Evaluación de modelos¶

Preparativos¶

In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import spbn_classifier as spbn
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from skopt.space import Real, Categorical, Integer
from sklearn.tree import plot_tree
from sklearn.ensemble import GradientBoostingClassifier
SEED = 12312548
np.random.seed(SEED)

Prepare dataset¶

In [23]:
with open('../../Datos/var_sets.json') as f:
    f_vars = json.load(f)
with open('../../Datos/f_sel.json') as f:
    f_sel = json.load(f)
with open('../../Datos/nombres_verbose.json') as f:
    f_verbose = json.load(f)
df = pd.read_csv('../../Datos/FJD_v9-10_1_din.csv')
df.head()
Out[23]:
REGISTRO Fingplan Fecha_emision Fumador Cardio Pulmonar Diabetes Renal Neuro Onco ... TADprimera TADMin TADMax FCprimera FCMin FCMax TempPrimera TempMin TempMax TiempoIngreso
0 FJD_1 2020-03-22 17:31:12 2020-03-21 18:03:00 Exfumador/a No No Si No No No ... 62.0 47.0 99.0 91.0 34.0 125.0 36.9 35.0 38.8 87.902627
1 FJD_1 2020-03-22 17:31:12 2020-03-23 01:59:00 Exfumador/a No No Si No No No ... 62.0 47.0 99.0 91.0 34.0 125.0 36.9 35.0 38.8 87.902627
2 FJD_1 2020-03-22 17:31:12 2020-03-24 06:44:00 Exfumador/a No No Si No No No ... 62.0 47.0 99.0 91.0 34.0 125.0 36.9 35.0 38.8 87.902627
3 FJD_1 2020-03-22 17:31:12 2020-03-24 18:55:00 Exfumador/a No No Si No No No ... 62.0 47.0 99.0 91.0 34.0 125.0 36.9 35.0 38.8 87.902627
4 FJD_1 2020-03-22 17:31:12 2020-03-25 19:55:00 Exfumador/a No No Si No No No ... 62.0 47.0 99.0 91.0 34.0 125.0 36.9 35.0 38.8 87.902627

5 rows × 88 columns

In [24]:
int_to_cat = [x for x in f_vars['meds_full'] if x in df.columns]
df = df.astype({x: 'str' for x in int_to_cat}).astype({x: 'category' for x in int_to_cat})
In [25]:
to_float = df.columns[df.dtypes == 'int64']
df = df.astype({x: 'float64' for x in to_float})
to_cat = [x for x in df.columns[df.dtypes == object] if x not in ["REGISTRO", "Fingplan", "Faltplan", "Fecha_emision", "Ola"]]
df = df.astype({x: 'category' for x in to_cat})
df['TiempoIngreso'] = pd.qcut(df['TiempoIngreso'], 5).astype(str).astype('category')
In [26]:
df = df.groupby('REGISTRO').aggregate('first')
In [27]:
translate = f_verbose['en']
inv_translate = {v:k for k, v in translate.items()}
df.rename(f_verbose['en'], inplace=True, axis=1, )

Conjunto train, test y validación...

In [28]:
df_train = df[~df['esTest'] & ~df['esVal']].drop(['esTest', 'esVal'], axis=1)
df_test = df[df['esTest'] & ~df['esVal']].drop(['esTest', 'esVal'], axis=1)
df_val = df[~df['esTest'] & df['esVal']].drop(['esTest', 'esVal'], axis=1)
In [29]:
from sklearn.metrics import roc_auc_score, confusion_matrix, ConfusionMatrixDisplay, RocCurveDisplay, brier_score_loss
from sklearn.calibration import CalibrationDisplay
from joblib import dump, load
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler, OrdinalEncoder, OneHotEncoder
import shap
import pprint
from IPython.display import HTML, display
import tabulate


class MyPipeline(Pipeline):
    # https://stackoverflow.com/questions/33469633/how-to-transform-items-using-sklearn-pipeline
    def just_transforms(self, X):
        """Applies all transforms to the data, without applying last 
        estimator.

        Parameters
        ----------
        X : iterable
            Data to predict on. Must fulfill input requirements of first step of
            the pipeline.
        """
        Xt = X
        for name, transform in self.steps[:-1]:
            Xt = transform.transform(Xt)
        return self.steps[-1][1], Xt


def dict_to_table(d, header=['', '']):
    arr = [[a,b] for a,b in d.items()]
    table = tabulate.tabulate(arr, tablefmt='html', headers=header)
    display(HTML(table))

def variables_olas(olas):
    olas_str = [str(x) for x in olas]
    variables = set()
    #variables = variables.union(set(f_sel['rf_' + '.'.join(olas_str)]['EXITUS']))
    variables = variables.union(set(f_sel['gbm_' + '.'.join(olas_str)]['EXITUS']))
    variables = [translate[v] for v in variables]
    return list(variables)

def seleccion_olas(df, olas):
    variables = variables_olas(olas)
    return df[df[translate['Ola']].isin(olas)][variables + [translate['EXITUS']]]


def model_tune(model, tune_grid, X_train, y_train, scale=False, encode=None, n_iter=50, verbose=0):
    steps = []
    if encode == 'ordinal':
        steps.append(('encode', OrdinalEncoder()))
    elif encode == 'ohe':
        steps.append(('encode', OneHotEncoder()))
    if scale:
        steps.append(('scale', RobustScaler()))
    if steps:
        steps.append(('model', model))
        model = MyPipeline(steps)
        tune_grid = {f'model__{p}': tune_grid[p] for p in tune_grid.keys()}
    tuned_model = BayesSearchCV(
        model, tune_grid, n_jobs=-1, cv=5, random_state=SEED, scoring='roc_auc', n_iter=n_iter, verbose=verbose
    ).fit(X_train, y_train)
    print(f'Tune result: {tuned_model.best_score_} auc.')
    return tuned_model.best_estimator_, tuned_model.best_score_

def model_report(model, X, y, id_persistencia=None):
    out = {'metrics': {}, 'model': model, 'y': y}
    feature_names = list(X.columns)
    out['feature_names'] = feature_names
    if str(type(model)) == str(MyPipeline):
        model, X = model.just_transforms(X)
    metrics = out['metrics']
    
    out['y_prob'] = y_prob = model.predict_proba(X)[:, 1]
    metrics['roc'] = roc_auc_score(y, y_prob)
    metrics['brier_loss'] = brier_score_loss(y, y_prob, pos_label="S")

    out['y_pred'] = y_pred = model.predict(X)
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
    metrics['specificity'] = tn / (tn + fp)
    metrics['sensitivity'] = tp / (tp + fn)
    metrics['accuracy'] = (tp + tn) / (tp + tn + fp + fn)
    if id_persistencia:
        dump(out, id_persistencia)
    return out


def shap_plot(report, X_train, X_val, shap_explainer=None):
    if shap_explainer:
        model = report['model']
        if str(type(model)) == str(MyPipeline):
            _, X_train = model.just_transforms(X_train)
            model, X_val = model.just_transforms(X_val)
        if shap_explainer == shap.TreeExplainer:
            shap_values = shap_explainer(model, X_train, seed=SEED).shap_values(X_train, check_additivity=False)
        else:
            shap_values = shap_explainer(model, X_train, seed=SEED).shap_values(X_train)
        report['shap'] = shap_values
        _ = plt.figure(figsize=(15,6))
        if type(shap_values) == type([]):
            shap_values = shap_values[1]
        shap.summary_plot(shap_values, X_train, report['feature_names'], show=False, plot_size=None)
        plt.show()


def plot_report(report, model_repr=None):
    fig, axs = plt.subplot_mosaic([['left', 'right'],['bottom', 'bottom']],
                                  figsize=(10, 10), constrained_layout=True)
    RocCurveDisplay.from_predictions(report['y'], report['y_prob'], pos_label='S', ax=axs['left'])
    axs['left'].set_xticks(np.arange(0, 1, step=0.1))
    axs['left'].set_yticks(np.arange(0, 1, step=0.1))
    CalibrationDisplay.from_predictions(report['y'], report['y_prob'], pos_label='S', ax=axs['right'])
    axs['right'].set_xticks(np.arange(0, 1, step=0.1))
    axs['right'].set_yticks(np.arange(0, 1, step=0.1))
    ConfusionMatrixDisplay.from_predictions(report['y'], report['y_pred'], ax=axs['bottom'])
    plt.show()

    dict_to_table(report['metrics'], ['Metric', 'Value'])
    model = report['model']
    if model_repr:
        if str(type(model)) == str(MyPipeline):
            model = model.steps[-1][1]
        model_repr(model, report['feature_names'])
    dict_to_table(model.get_params(), ['Hyperparameter', 'Value'])

def generar_id(modelo, olas, disc=False, es_test=False):
    if modelo is None:
        return None
    return ('modelosPersistidos/' + 
            modelo + 
            '_'.join([str(o) for o in olas]) + 
            ('_disc' if disc else '') + 
            ('_test' if es_test else '') + 
            '.joblib')


def model_demonstrate(olas, model, tune_grid, df_train, df_val, scale=False, encode=None,
                      shap_explainer=None, model_repr=None, class_name='EXITUS', nombre_modelo=None,
                      n_iter=50, verbose=0, es_test=False):
    df_train_ola = seleccion_olas(df_train, olas)
    df_val_ola = seleccion_olas(df_val, olas)
    class_name = translate[class_name]
    predictors_ola = [x for x in df_train_ola.columns if x != class_name]
    X_train, y_train = df_train_ola[predictors_ola], df_train_ola[class_name]
    X_val, y_val = df_val_ola[predictors_ola], df_val_ola[class_name]

    if nombre_modelo is None:
        nombre_modelo = model.__class__.__name__

    if n_iter > 0:
        print(f'Tuning params {", ".join(list(tune_grid.keys()))}')
        tuned_model, train_score = model_tune(model, tune_grid, X_train, y_train, scale=scale, n_iter=n_iter, verbose=verbose)
    else:
        tuned_model, train_score = model.fit(X_train, y_train), None

    id_modelo = generar_id(nombre_modelo, olas, disc=(encode is not None), es_test=es_test)
    report = model_report(
        tuned_model, X_val, y_val, id_persistencia=id_modelo)
    shap_plot(report, X_train, X_val, shap_explainer)
    report['train_score'] = train_score
    plot_report(report, model_repr=model_repr)
    return report, (X_train, y_train, X_val, y_val)
    

Regresión Logística¶

In [111]:
def lr_repr(lr, feature_names):
    ws = {f: lr.coef_[0][i] for i, f in enumerate(feature_names)}
    ws = dict(sorted(ws.items(), key=lambda item: abs(item[1]), reverse=True))
    dict_to_table(ws, ['Feature', 'Weight'])
In [20]:
model = LogisticRegression(penalty='elasticnet', solver='saga', n_jobs=-1)
search_space = {
    'C': Real(0.1, 100, prior='log-uniform'),
    'l1_ratio': Real(0, 1, prior='uniform'),
    'class_weight': Categorical(['balanced', None])
}

Ola 1¶

In [21]:
report, _ = model_demonstrate([1], model, search_space, df_train, df_val, scale=True, shap_explainer=shap.LinearExplainer, model_repr=lr_repr)
print(report['train_score'])
Tuning params C, l1_ratio, class_weight
The max_iter was reached which means the coef_ did not converge
Tune result: 0.9028736451430929 auc.
Metric Value
roc 0.872247
brier_loss 0.130027
specificity0.902439
sensitivity0.6
accuracy 0.808989
Feature Weight
Age 1.29664
Oxygen saturation (daily maximum) -0.641471
Body temperaure (daily maximum) 0.554038
Lactate dehydrogenase (LDH) 0.470849
Heart rate (daily maximum) 0.416539
Red Cell Blood Distribution Width (RDW) 0.403401
Mean corpuscular volume 0.35603
Mean corpuscular hemoglobin concentration (MCHC)-0.25308
Glucose 0.251309
Albumin -0.249037
Blood urea nitrogen (BUN) 0.239575
Platelets -0.228973
Calcium -0.189948
Lymphocyte % -0.185796
Hemoglobin -0.171281
Diastolic blood pressure (daily maximum) -0.15559
Partial pressure of oxygen (Blood gas test) 0.153068
Heart rate (daily minimum) 0.143473
Oxygen saturation (daily minimum) -0.137545
Ferritin 0.0892244
Urea 0.06244
Systolic blood pressure (daily maximum) 0.0406559
Body temperature (daily minimum) 0.0404112
International normalized ratio (INR) 0.0374522
Lymphocyte count -0.0227599
Eosinophil % -0.016723
Red blood cells -0.0115366
Prothrombin time (PT) 0.000732271
Diastolic blood pressure (daily minimum) 0
Hematocrit 0
Hyperparameter Value
C 0.27166552334797633
class_weight
dual False
fit_intercept True
intercept_scaling1
l1_ratio 0.7965934415128313
max_iter 100
multi_class auto
n_jobs -1
penalty elasticnet
random_state
solver saga
tol 0.0001
verbose 0
warm_start False
0.9028736451430929

Ola 2¶

In [22]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=True, shap_explainer=shap.LinearExplainer, model_repr=lr_repr)
print(report['train_score'])
Tuning params C, l1_ratio, class_weight
The max_iter was reached which means the coef_ did not converge
Tune result: 0.8778755141912032 auc.
Metric Value
roc 0.876304
brier_loss 0.0917259
specificity0.966216
sensitivity0.350877
accuracy 0.866856
Feature Weight
Age 1.52897
Red Cell Blood Distribution Width (RDW) 0.411721
Heart rate (daily maximum) 0.368521
Body temperaure (daily maximum) 0.360037
Estimated glomerular filtration rate (eGFR) ckd-epi-0.313
Systolic blood pressure (first measure) -0.312372
Mean corpuscular volume 0.309369
Partial pressure of oxygen (Blood gas test) 0.237908
Lactate dehydrogenase (LDH) 0.215156
Partial pressure of CO2 (Blood gas test) -0.214141
Diastolic blood pressure (daily maximum) 0.21212
Body temperature (daily minimum) 0.205722
Platelets -0.1838
Total CO2 (blood gas test) 0.175816
Albumin -0.15411
Oxygen saturation (daily maximum) -0.153838
Blood urea nitrogen (BUN) 0.140116
Hematocrit -0.130762
Lymphocyte % -0.117527
Glucose 0.115247
Systolic blood pressure (daily maximum) 0.0842292
Urea 0.066126
Oxygen saturation (daily minimum) -0.0605084
Monocytes % -0.0583326
C-reactive protein 0.0581361
Current bicarbonate (blood gas test) -0.0377462
International normalized ratio (INR) 0.0169834
D-Dimer 0.0143832
Segmented neutrophils % -0.0042528
Hyperparameter Value
C 2.3396397269905416
class_weight
dual False
fit_intercept True
intercept_scaling1
l1_ratio 1.0
max_iter 100
multi_class auto
n_jobs -1
penalty elasticnet
random_state
solver saga
tol 0.0001
verbose 0
warm_start False
0.8778755141912032

Olas 3, 4 y 5¶

In [23]:
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=True, shap_explainer=shap.LinearExplainer, model_repr=lr_repr)
print(report['train_score'])
Tuning params C, l1_ratio, class_weight
The max_iter was reached which means the coef_ did not converge
Tune result: 0.8480042952774796 auc.
Metric Value
roc 0.824806
brier_loss 0.172728
specificity0.730233
sensitivity0.740741
accuracy 0.731405
Feature Weight
Age 1.52859
Body temperaure (daily maximum) 0.571957
Red Cell Blood Distribution Width (RDW) 0.325664
Lymphocyte % -0.280928
Blood urea nitrogen (BUN) 0.242536
Lactate dehydrogenase (LDH) 0.210772
Heart rate (daily maximum) 0.166012
Albumin -0.131302
Calcium -0.130377
Heart rate (daily minimum) 0.119985
Oxygen saturation (daily minimum) -0.106787
Partial Thromboplastin Time ratio 0.0791531
D-Dimer 0.0576698
Lymphocyte count 0.0540325
Heart rate (first measure) 0.0468054
Activated Partial Thromboplastin Time (aPTT) 0.0104338
Monocytes % 0
Current bicarbonate (blood gas test) 0
Hyperparameter Value
C 0.1
class_weight balanced
dual False
fit_intercept True
intercept_scaling1
l1_ratio 0.5009484933418734
max_iter 100
multi_class auto
n_jobs -1
penalty elasticnet
random_state
solver saga
tol 0.0001
verbose 0
warm_start False
0.8480042952774796

Ola 6¶

In [24]:
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=True, shap_explainer=shap.LinearExplainer, model_repr=lr_repr)
print(report['train_score'])
Tuning params C, l1_ratio, class_weight
The max_iter was reached which means the coef_ did not converge
Tune result: 0.8114693400167085 auc.
Metric Value
roc 0.827214
brier_loss 0.0975671
specificity0.961538
sensitivity0.136364
accuracy 0.859551
Feature Weight
Age 1.37553
Heart rate (daily maximum) 0.491425
Hematocrit -0.299552
Systolic blood pressure (daily minimum) 0.293022
Systolic blood pressure (first measure) -0.275652
Lactate dehydrogenase (LDH) 0.26678
Mean corpuscular volume 0.256462
Systolic blood pressure (daily maximum) -0.248719
Glucose 0.238793
C-reactive protein 0.190043
Urea 0.182359
Oxygen saturation (daily maximum) -0.167072
Partial Thromboplastin Time ratio 0.161217
Hemolysis index 0.15631
Derived fibrinogen 0.150584
Hemoglobin -0.143416
Mean corpuscular hemoglobin concentration (MCHC)-0.138783
Monocytes % -0.138
Creatinine 0.115695
Segmented neutrophils % 0.0935264
Oxygen saturation (first measure) 0.0856922
Oxygen saturation (daily minimum) -0.085041
Heart rate (first measure) -0.0607562
D-Dimer -0.0336893
Blood urea nitrogen (BUN) 0.0242479
Prothrombin time (PT) -0.0242128
Eosinophil % -0.00892222
Lymphocyte % 0.00803419
Activated Partial Thromboplastin Time (aPTT) 0
Hyperparameter Value
C 0.6576313755130081
class_weight
dual False
fit_intercept True
intercept_scaling1
l1_ratio 0.27956404472462143
max_iter 100
multi_class auto
n_jobs -1
penalty elasticnet
random_state
solver saga
tol 0.0001
verbose 0
warm_start False
0.8114693400167085

Decision Tree¶

In [25]:
model = DecisionTreeClassifier(random_state=SEED, )
search_space = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'max_depth': Integer(1, 8),
    'min_samples_split': Integer(2, 15),
    'min_samples_leaf': Integer(1, 10),
    'ccp_alpha': Real(0, 0.035),
    'class_weight': Categorical(['balanced', None]),
    'min_impurity_decrease': Real(0, 0.1),
    'max_features': Categorical(['sqrt', 'log2', None])
}
def dt_repr(dt, feature_names=None, figsize=(20, 10), fontsize=7, max_depth=3):
    if feature_names is None:
        feature_names = dt.feature_names_in_
    plt.figure(figsize=figsize)
    plot_tree(dt,max_depth=max_depth, feature_names=feature_names, label='none', class_names=['Survive', 'Death'],
              filled=True, rounded=True, proportion=True, impurity=False, precision=2, fontsize=fontsize)
    plt.show()

Ola 1¶

In [26]:
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=dt_repr)
print(report['train_score'])
Tuning params criterion, max_depth, min_samples_split, min_samples_leaf, ccp_alpha, class_weight, min_impurity_decrease, max_features
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
Tune result: 0.8486132080358292 auc.
Metric Value
roc 0.80109
brier_loss 0.174041
specificity0.817073
sensitivity0.718182
accuracy 0.786517
Hyperparameter Value
ccp_alpha 0.0
class_weight balanced
criterion gini
max_depth 6
max_features
max_leaf_nodes
min_impurity_decrease 0.0
min_samples_leaf 10
min_samples_split 2
min_weight_fraction_leaf0.0
random_state 12312548
splitter best
0.8486132080358292

Ola 2¶

In [27]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=dt_repr)
print(report['train_score'])
Tuning params criterion, max_depth, min_samples_split, min_samples_leaf, ccp_alpha, class_weight, min_impurity_decrease, max_features
Tune result: 0.8015639406033493 auc.
Metric Value
roc 0.796408
brier_loss 0.174694
specificity0.574324
sensitivity0.894737
accuracy 0.626062
Hyperparameter Value
ccp_alpha 0.035
class_weight balanced
criterion entropy
max_depth 8
max_features
max_leaf_nodes
min_impurity_decrease 0.0
min_samples_leaf 7
min_samples_split 2
min_weight_fraction_leaf0.0
random_state 12312548
splitter best
0.8015639406033493

Olas 3, 4 y 5¶

In [28]:
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=dt_repr)
print(report['train_score'])
Tuning params criterion, max_depth, min_samples_split, min_samples_leaf, ccp_alpha, class_weight, min_impurity_decrease, max_features
Tune result: 0.8242380451918023 auc.
Metric Value
roc 0.796985
brier_loss 0.189001
specificity0.506977
sensitivity1
accuracy 0.561983
Hyperparameter Value
ccp_alpha 0.0
class_weight balanced
criterion log_loss
max_depth 7
max_features
max_leaf_nodes
min_impurity_decrease 0.029163250328520157
min_samples_leaf 1
min_samples_split 2
min_weight_fraction_leaf0.0
random_state 12312548
splitter best
0.8242380451918023

Ola 6¶

In [29]:
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=dt_repr)
print(report['train_score'])
Tuning params criterion, max_depth, min_samples_split, min_samples_leaf, ccp_alpha, class_weight, min_impurity_decrease, max_features
Tune result: 0.7657213403880071 auc.
Metric Value
roc 0.783362
brier_loss 0.187843
specificity0.794872
sensitivity0.727273
accuracy 0.786517
Hyperparameter Value
ccp_alpha 0.02649888916025202
class_weight balanced
criterion entropy
max_depth 4
max_features
max_leaf_nodes
min_impurity_decrease 0.009205638824541569
min_samples_leaf 6
min_samples_split 15
min_weight_fraction_leaf0.0
random_state 12312548
splitter best
0.7657213403880071

GBM¶

In [98]:
model = GradientBoostingClassifier(random_state=SEED)
search_space = {
    'n_estimators': Integer(5, 150),
    'learning_rate': Real(0.05, 0.2),
    'max_features': Categorical(['log2', None]),
    'n_iter_no_change': Integer(1, 10),
}

Ola 1¶

In [101]:
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=None)
print(report['train_score'])
Tuning params n_estimators, learning_rate, max_features, n_iter_no_change
Tune result: 0.9155530287710902 auc.
Metric Value
roc 0.88289
brier_loss 0.123538
specificity0.918699
sensitivity0.609091
accuracy 0.823034
Hyperparameter Value
ccp_alpha 0.0
criterion friedman_mse
init
learning_rate 0.05
loss log_loss
max_depth 3
max_features log2
max_leaf_nodes
min_impurity_decrease 0.0
min_samples_leaf 1
min_samples_split 2
min_weight_fraction_leaf0.0
n_estimators 150
n_iter_no_change 10
random_state 12312548
subsample 1.0
tol 0.0001
validation_fraction 0.1
verbose 0
warm_start False
0.9155530287710902

Ola 2¶

In [32]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=None)
print(report['train_score'])
Tuning params n_estimators, learning_rate, max_features, n_iter_no_change
Tune result: 0.8710921804757454 auc.
Metric Value
roc 0.884068
brier_loss 0.0925896
specificity0.976351
sensitivity0.280702
accuracy 0.864023
Hyperparameter Value
ccp_alpha 0.0
criterion friedman_mse
init
learning_rate 0.18859591001753956
loss log_loss
max_depth 3
max_features log2
max_leaf_nodes
min_impurity_decrease 0.0
min_samples_leaf 1
min_samples_split 2
min_weight_fraction_leaf0.0
n_estimators 64
n_iter_no_change 6
random_state 12312548
subsample 1.0
tol 0.0001
validation_fraction 0.1
verbose 0
warm_start False
0.8710921804757454

Ola 3, 4 y 5¶

In [33]:
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=None)
print(report['train_score'])
Tuning params n_estimators, learning_rate, max_features, n_iter_no_change
Tune result: 0.8652028594093621 auc.
Metric Value
roc 0.891473
brier_loss 0.0747619
specificity0.981395
sensitivity0.148148
accuracy 0.88843
Hyperparameter Value
ccp_alpha 0.0
criterion friedman_mse
init
learning_rate 0.08410300395207498
loss log_loss
max_depth 3
max_features log2
max_leaf_nodes
min_impurity_decrease 0.0
min_samples_leaf 1
min_samples_split 2
min_weight_fraction_leaf0.0
n_estimators 100
n_iter_no_change 8
random_state 12312548
subsample 1.0
tol 0.0001
validation_fraction 0.1
verbose 0
warm_start False
0.8652028594093621

Ola 6¶

In [34]:
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=None)
print(report['train_score'])
Tuning params n_estimators, learning_rate, max_features, n_iter_no_change
The objective has been evaluated at this point before.
Tune result: 0.8310113246078158 auc.
Metric Value
roc 0.818036
brier_loss 0.0980626
specificity0.99359
sensitivity0.0909091
accuracy 0.882022
Hyperparameter Value
ccp_alpha 0.0
criterion friedman_mse
init
learning_rate 0.06464321510220243
loss log_loss
max_depth 3
max_features log2
max_leaf_nodes
min_impurity_decrease 0.0
min_samples_leaf 1
min_samples_split 2
min_weight_fraction_leaf0.0
n_estimators 107
n_iter_no_change 3
random_state 12312548
subsample 1.0
tol 0.0001
validation_fraction 0.1
verbose 0
warm_start False
0.8310113246078158

Naive-Bayes CLG¶

In [11]:
def bn_repr(bn, feature_names):
    bn.display()
In [9]:
model = spbn.KDBBNClassifierCLG(k=0, random_state=SEED, n_jobs=-1)
nombre_modelo = 'Naive-Bayes-CLG'
search_space = {
    'mi_thres': Categorical([0, 0.1]),
    'mi_nneighbors': Categorical([5, 10, 50, 100, 200]),
}

Ola 1¶

In [11]:
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8692631197044305 auc.
Metric Value
roc 0.839135
brier_loss 0.202066
specificity0.878049
sensitivity0.554545
accuracy 0.77809
Glucose Glucose Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Body temperature (daily minimum) Body temperature (daily minimum) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Calcium Calcium Eosinophil % Eosinophil % Urea Urea Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Age Age Ferritin Ferritin Platelets Platelets Albumin Albumin Hemoglobin Hemoglobin Mean corpuscular volume Mean corpuscular volume Heart rate (daily minimum) Heart rate (daily minimum) Lymphocyte % Lymphocyte % Prothrombin time (PT) Prothrombin time (PT) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Red blood cells Red blood cells International normalized ratio (INR) International normalized ratio (INR) Diastolic blood pressure (daily minimum) Diastolic blood pressure (daily minimum) Lymphocyte count Lymphocyte count Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Hematocrit Hematocrit Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Death Death Death->Glucose Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Heart rate (daily maximum) Death->Body temperature (daily minimum) Death->Oxygen saturation (daily maximum) Death->Red Cell Blood Distribution Width (RDW) Death->Calcium Death->Eosinophil % Death->Urea Death->Systolic blood pressure (daily maximum) Death->Partial pressure of oxygen (Blood gas test) Death->Age Death->Ferritin Death->Platelets Death->Albumin Death->Hemoglobin Death->Mean corpuscular volume Death->Heart rate (daily minimum) Death->Lymphocyte % Death->Prothrombin time (PT) Death->Lactate dehydrogenase (LDH) Death->Red blood cells Death->International normalized ratio (INR) Death->Diastolic blood pressure (daily minimum) Death->Lymphocyte count Death->Oxygen saturation (daily minimum) Death->Hematocrit Death->Mean corpuscular hemoglobin concentration (MCHC) Death->Diastolic blood pressure (daily maximum)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 0
max_indegree 0
max_iters 0
mi_nneighbors 200
mi_thres 0.0
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8692631197044305
In [12]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8439311399450586 auc.
Metric Value
roc 0.847202
brier_loss 0.149663
specificity0.89527
sensitivity0.491228
accuracy 0.830028
Glucose Glucose Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Body temperature (daily minimum) Body temperature (daily minimum) Partial pressure of CO2 (Blood gas test) Partial pressure of CO2 (Blood gas test) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Urea Urea D-Dimer D-Dimer Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Segmented neutrophils % Segmented neutrophils % Age Age Total CO2 (blood gas test) Total CO2 (blood gas test) Platelets Platelets Albumin Albumin Mean corpuscular volume Mean corpuscular volume C-reactive protein C-reactive protein Lymphocyte % Lymphocyte % Estimated glomerular filtration rate (eGFR) ckd-epi Estimated glomerular filtration rate (eGFR) ckd-epi Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) International normalized ratio (INR) International normalized ratio (INR) Monocytes % Monocytes % Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Hematocrit Hematocrit Systolic blood pressure (first measure) Systolic blood pressure (first measure) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Death Death Death->Glucose Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Heart rate (daily maximum) Death->Partial pressure of CO2 (Blood gas test) Death->Red Cell Blood Distribution Width (RDW) Death->Urea Death->Systolic blood pressure (daily maximum) Death->Age Death->Albumin Death->International normalized ratio (INR) Death->Systolic blood pressure (first measure) Death->Diastolic blood pressure (daily maximum)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 0
max_indegree 0
max_iters 0
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8439311399450586
In [13]:
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8295335925872991 auc.
Metric Value
roc 0.822394
brier_loss 0.120831
specificity0.925581
sensitivity0.37037
accuracy 0.863636
Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Calcium Calcium Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) D-Dimer D-Dimer Age Age Albumin Albumin Heart rate (daily minimum) Heart rate (daily minimum) Lymphocyte % Lymphocyte % Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Heart rate (first measure) Heart rate (first measure) Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Lymphocyte count Lymphocyte count Monocytes % Monocytes % Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Death Death Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Heart rate (daily maximum) Death->Red Cell Blood Distribution Width (RDW) Death->Calcium Death->Partial Thromboplastin Time ratio Death->Age Death->Albumin Death->Heart rate (daily minimum) Death->Heart rate (first measure) Death->Lymphocyte count Death->Oxygen saturation (daily minimum)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 0
max_indegree 0
max_iters 0
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8295335925872991
In [14]:
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.7908185278009839 auc.
Metric Value
roc 0.746503
brier_loss 0.166789
specificity0.878205
sensitivity0.227273
accuracy 0.797753
Glucose Glucose Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Heart rate (daily maximum) Heart rate (daily maximum) Derived fibrinogen Derived fibrinogen Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Eosinophil % Eosinophil % Oxygen saturation (first measure) Oxygen saturation (first measure) Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Urea Urea D-Dimer D-Dimer Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Creatinine Creatinine Segmented neutrophils % Segmented neutrophils % Age Age Hemoglobin Hemoglobin Mean corpuscular volume Mean corpuscular volume C-reactive protein C-reactive protein Lymphocyte % Lymphocyte % Prothrombin time (PT) Prothrombin time (PT) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Heart rate (first measure) Heart rate (first measure) Hemolysis index Hemolysis index Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Monocytes % Monocytes % Systolic blood pressure (daily minimum) Systolic blood pressure (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Hematocrit Hematocrit Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Systolic blood pressure (first measure) Systolic blood pressure (first measure) Death Death Death->Glucose Death->Heart rate (daily maximum) Death->Urea Death->Age Death->Hemoglobin Death->Mean corpuscular volume Death->Lymphocyte % Death->Prothrombin time (PT) Death->Heart rate (first measure) Death->Activated Partial Thromboplastin Time (aPTT) Death->Monocytes % Death->Hematocrit Death->Mean corpuscular hemoglobin concentration (MCHC) Death->Systolic blood pressure (first measure)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 0
max_indegree 0
max_iters 0
mi_nneighbors 10
mi_thres 0.1
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.7908185278009839

Chow Liu Tan CLG¶

In [15]:
model = spbn.CLTANBNClassifierCLG(random_state=SEED, n_jobs=-1)
nombre_modelo = 'Chow-Liu-TAN-CLG'
search_space = {
    'mi_thres': Categorical([0, 0.1]),
    'mi_nneighbors': Categorical([5, 10, 50, 100, 200]),
}

Ola 1¶

In [16]:
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8645437091158602 auc.
Metric Value
roc 0.841537
brier_loss 0.195423
specificity0.878049
sensitivity0.554545
accuracy 0.77809
Glucose Glucose Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Glucose->Oxygen saturation (daily maximum) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Urea Urea Blood urea nitrogen (BUN)->Urea Oxygen saturation (daily maximum)->Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Oxygen saturation (daily maximum)->Body temperaure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Oxygen saturation (daily maximum)->Heart rate (daily maximum) Body temperature (daily minimum) Body temperature (daily minimum) Oxygen saturation (daily maximum)->Body temperature (daily minimum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Oxygen saturation (daily maximum)->Red Cell Blood Distribution Width (RDW) Calcium Calcium Oxygen saturation (daily maximum)->Calcium Eosinophil % Eosinophil % Oxygen saturation (daily maximum)->Eosinophil % Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Systolic blood pressure (daily maximum) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Oxygen saturation (daily maximum)->Partial pressure of oxygen (Blood gas test) Age Age Oxygen saturation (daily maximum)->Age Ferritin Ferritin Oxygen saturation (daily maximum)->Ferritin Platelets Platelets Oxygen saturation (daily maximum)->Platelets Albumin Albumin Oxygen saturation (daily maximum)->Albumin Mean corpuscular volume Mean corpuscular volume Oxygen saturation (daily maximum)->Mean corpuscular volume Heart rate (daily minimum) Heart rate (daily minimum) Oxygen saturation (daily maximum)->Heart rate (daily minimum) Lymphocyte count Lymphocyte count Oxygen saturation (daily maximum)->Lymphocyte count International normalized ratio (INR) International normalized ratio (INR) Oxygen saturation (daily maximum)->International normalized ratio (INR) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Oxygen saturation (daily maximum)->Lactate dehydrogenase (LDH) Red blood cells Red blood cells Oxygen saturation (daily maximum)->Red blood cells Diastolic blood pressure (daily minimum) Diastolic blood pressure (daily minimum) Oxygen saturation (daily maximum)->Diastolic blood pressure (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily maximum)->Oxygen saturation (daily minimum) Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Oxygen saturation (daily maximum)->Mean corpuscular hemoglobin concentration (MCHC) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Diastolic blood pressure (daily maximum) Hemoglobin Hemoglobin Hematocrit Hematocrit Hematocrit->Hemoglobin Lymphocyte % Lymphocyte % Lymphocyte count->Lymphocyte % Prothrombin time (PT) Prothrombin time (PT) International normalized ratio (INR)->Prothrombin time (PT) Red blood cells->Hematocrit Death Death Death->Glucose Death->Blood urea nitrogen (BUN) Death->Oxygen saturation (daily maximum) Death->Body temperaure (daily maximum) Death->Heart rate (daily maximum) Death->Body temperature (daily minimum) Death->Red Cell Blood Distribution Width (RDW) Death->Calcium Death->Eosinophil % Death->Urea Death->Systolic blood pressure (daily maximum) Death->Partial pressure of oxygen (Blood gas test) Death->Age Death->Ferritin Death->Platelets Death->Albumin Death->Hemoglobin Death->Hematocrit Death->Mean corpuscular volume Death->Heart rate (daily minimum) Death->Lymphocyte % Death->Lymphocyte count Death->Prothrombin time (PT) Death->International normalized ratio (INR) Death->Lactate dehydrogenase (LDH) Death->Red blood cells Death->Diastolic blood pressure (daily minimum) Death->Oxygen saturation (daily minimum) Death->Mean corpuscular hemoglobin concentration (MCHC) Death->Diastolic blood pressure (daily maximum)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 0
max_iters 0
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8645437091158602
In [17]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8267598351885397 auc.
Metric Value
roc 0.845869
brier_loss 0.152701
specificity0.885135
sensitivity0.508772
accuracy 0.824363
Glucose Glucose Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Glucose->Oxygen saturation (daily maximum) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Urea Urea Urea->Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Oxygen saturation (daily maximum)->Body temperaure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Oxygen saturation (daily maximum)->Heart rate (daily maximum) Body temperature (daily minimum) Body temperature (daily minimum) Oxygen saturation (daily maximum)->Body temperature (daily minimum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Oxygen saturation (daily maximum)->Red Cell Blood Distribution Width (RDW) Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Oxygen saturation (daily maximum)->Current bicarbonate (blood gas test) Estimated glomerular filtration rate (eGFR) ckd-epi Estimated glomerular filtration rate (eGFR) ckd-epi Oxygen saturation (daily maximum)->Estimated glomerular filtration rate (eGFR) ckd-epi D-Dimer D-Dimer Oxygen saturation (daily maximum)->D-Dimer Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Systolic blood pressure (daily maximum) Monocytes % Monocytes % Oxygen saturation (daily maximum)->Monocytes % Platelets Platelets Oxygen saturation (daily maximum)->Platelets Albumin Albumin Oxygen saturation (daily maximum)->Albumin Mean corpuscular volume Mean corpuscular volume Oxygen saturation (daily maximum)->Mean corpuscular volume C-reactive protein C-reactive protein Oxygen saturation (daily maximum)->C-reactive protein Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Oxygen saturation (daily maximum)->Lactate dehydrogenase (LDH) International normalized ratio (INR) International normalized ratio (INR) Oxygen saturation (daily maximum)->International normalized ratio (INR) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily maximum)->Oxygen saturation (daily minimum) Hematocrit Hematocrit Oxygen saturation (daily maximum)->Hematocrit Systolic blood pressure (first measure) Systolic blood pressure (first measure) Oxygen saturation (daily maximum)->Systolic blood pressure (first measure) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Diastolic blood pressure (daily maximum) Partial pressure of CO2 (Blood gas test) Partial pressure of CO2 (Blood gas test) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Partial pressure of CO2 (Blood gas test)->Partial pressure of oxygen (Blood gas test) Total CO2 (blood gas test) Total CO2 (blood gas test) Total CO2 (blood gas test)->Partial pressure of CO2 (Blood gas test) Current bicarbonate (blood gas test)->Total CO2 (blood gas test) Estimated glomerular filtration rate (eGFR) ckd-epi->Urea Age Age Estimated glomerular filtration rate (eGFR) ckd-epi->Age Segmented neutrophils % Segmented neutrophils % Lymphocyte % Lymphocyte % Segmented neutrophils %->Lymphocyte % Monocytes %->Segmented neutrophils % Death Death Death->Glucose Death->Blood urea nitrogen (BUN) Death->Urea Death->Body temperaure (daily maximum) Death->Oxygen saturation (daily maximum) Death->Heart rate (daily maximum) Death->Body temperature (daily minimum) Death->Partial pressure of CO2 (Blood gas test) Death->Total CO2 (blood gas test) Death->Red Cell Blood Distribution Width (RDW) Death->Current bicarbonate (blood gas test) Death->Estimated glomerular filtration rate (eGFR) ckd-epi Death->D-Dimer Death->Systolic blood pressure (daily maximum) Death->Partial pressure of oxygen (Blood gas test) Death->Segmented neutrophils % Death->Monocytes % Death->Age Death->Platelets Death->Albumin Death->Mean corpuscular volume Death->C-reactive protein Death->Lymphocyte % Death->Lactate dehydrogenase (LDH) Death->International normalized ratio (INR) Death->Oxygen saturation (daily minimum) Death->Hematocrit Death->Systolic blood pressure (first measure) Death->Diastolic blood pressure (daily maximum)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 0
max_iters 0
mi_nneighbors 200
mi_thres 0.0
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8267598351885397
In [18]:
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.7858673734782025 auc.
Metric Value
roc 0.799655
brier_loss 0.13268
specificity0.92093
sensitivity0.333333
accuracy 0.855372
Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Age Age Blood urea nitrogen (BUN)->Age D-Dimer D-Dimer Blood urea nitrogen (BUN)->D-Dimer Body temperaure (daily maximum) Body temperaure (daily maximum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum)->Body temperaure (daily maximum) Heart rate (first measure) Heart rate (first measure) Oxygen saturation (daily minimum)->Heart rate (first measure) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Oxygen saturation (daily minimum)->Red Cell Blood Distribution Width (RDW) Albumin Albumin Oxygen saturation (daily minimum)->Albumin Heart rate (daily minimum) Heart rate (daily minimum) Oxygen saturation (daily minimum)->Heart rate (daily minimum) Heart rate (daily maximum) Heart rate (daily maximum) Heart rate (first measure)->Heart rate (daily maximum) Calcium Calcium Albumin->Calcium Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Albumin->Lactate dehydrogenase (LDH) Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Partial Thromboplastin Time ratio->Activated Partial Thromboplastin Time (aPTT) Age->Oxygen saturation (daily minimum) Age->Partial Thromboplastin Time ratio Lymphocyte count Lymphocyte count Age->Lymphocyte count Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Lymphocyte count->Current bicarbonate (blood gas test) Lymphocyte % Lymphocyte % Lymphocyte count->Lymphocyte % Monocytes % Monocytes % Lymphocyte count->Monocytes % Death Death Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Oxygen saturation (daily minimum) Death->Heart rate (daily maximum) Death->Heart rate (first measure) Death->Red Cell Blood Distribution Width (RDW) Death->Calcium Death->Albumin Death->Partial Thromboplastin Time ratio Death->Age Death->Current bicarbonate (blood gas test) Death->Lymphocyte count Death->D-Dimer Death->Heart rate (daily minimum) Death->Lymphocyte % Death->Lactate dehydrogenase (LDH) Death->Activated Partial Thromboplastin Time (aPTT) Death->Monocytes %
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 0
max_iters 0
mi_nneighbors 5
mi_thres 0.1
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.7858673734782025
In [19]:
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.707173173674928 auc.
Metric Value
roc 0.730478
brier_loss 0.190851
specificity0.858974
sensitivity0.227273
accuracy 0.780899
Glucose Glucose Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Glucose->Oxygen saturation (daily maximum) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Urea Urea Blood urea nitrogen (BUN)->Urea Age Age Age->Blood urea nitrogen (BUN) Heart rate (daily maximum) Heart rate (daily maximum) Oxygen saturation (daily maximum)->Age Oxygen saturation (daily maximum)->Heart rate (daily maximum) Derived fibrinogen Derived fibrinogen Oxygen saturation (daily maximum)->Derived fibrinogen Eosinophil % Eosinophil % Oxygen saturation (daily maximum)->Eosinophil % Oxygen saturation (first measure) Oxygen saturation (first measure) Oxygen saturation (daily maximum)->Oxygen saturation (first measure) Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Oxygen saturation (daily maximum)->Activated Partial Thromboplastin Time (aPTT) D-Dimer D-Dimer Oxygen saturation (daily maximum)->D-Dimer Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Systolic blood pressure (daily maximum) Monocytes % Monocytes % Oxygen saturation (daily maximum)->Monocytes % Hemoglobin Hemoglobin Oxygen saturation (daily maximum)->Hemoglobin Mean corpuscular volume Mean corpuscular volume Oxygen saturation (daily maximum)->Mean corpuscular volume Prothrombin time (PT) Prothrombin time (PT) Oxygen saturation (daily maximum)->Prothrombin time (PT) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Oxygen saturation (daily maximum)->Lactate dehydrogenase (LDH) Heart rate (first measure) Heart rate (first measure) Oxygen saturation (daily maximum)->Heart rate (first measure) Hemolysis index Hemolysis index Oxygen saturation (daily maximum)->Hemolysis index Systolic blood pressure (daily minimum) Systolic blood pressure (daily minimum) Oxygen saturation (daily maximum)->Systolic blood pressure (daily minimum) Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Oxygen saturation (daily maximum)->Mean corpuscular hemoglobin concentration (MCHC) Systolic blood pressure (first measure) Systolic blood pressure (first measure) Oxygen saturation (daily maximum)->Systolic blood pressure (first measure) C-reactive protein C-reactive protein Derived fibrinogen->C-reactive protein Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (first measure)->Oxygen saturation (daily minimum) Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Activated Partial Thromboplastin Time (aPTT)->Partial Thromboplastin Time ratio Creatinine Creatinine Urea->Creatinine Segmented neutrophils % Segmented neutrophils % Lymphocyte % Lymphocyte % Segmented neutrophils %->Lymphocyte % Monocytes %->Segmented neutrophils % Hematocrit Hematocrit Hemoglobin->Hematocrit Death Death Death->Glucose Death->Blood urea nitrogen (BUN) Death->Age Death->Heart rate (daily maximum) Death->Oxygen saturation (daily maximum) Death->Derived fibrinogen Death->Eosinophil % Death->Oxygen saturation (first measure) Death->Partial Thromboplastin Time ratio Death->Activated Partial Thromboplastin Time (aPTT) Death->Urea Death->D-Dimer Death->Systolic blood pressure (daily maximum) Death->Creatinine Death->Segmented neutrophils % Death->Monocytes % Death->Hemoglobin Death->Mean corpuscular volume Death->C-reactive protein Death->Lymphocyte % Death->Prothrombin time (PT) Death->Lactate dehydrogenase (LDH) Death->Heart rate (first measure) Death->Hemolysis index Death->Systolic blood pressure (daily minimum) Death->Oxygen saturation (daily minimum) Death->Hematocrit Death->Mean corpuscular hemoglobin concentration (MCHC) Death->Systolic blood pressure (first measure)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 0
max_iters 0
mi_nneighbors 100
mi_thres 0.1
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.707173173674928

2-DB CLG¶

In [20]:
model = spbn.KDBBNClassifierCLG(k=2, random_state=SEED, n_jobs=-1)
nombre_modelo = '2DB-BNC-CLG'
search_space = {
    'mi_thres': Categorical([0, 0.1]),
    'mi_nneighbors': Categorical([5, 10, 50, 100, 200]),
}

Ola 1¶

In [21]:
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8590092589799312 auc.
Metric Value
roc 0.838452
brier_loss 0.199964
specificity0.886179
sensitivity0.518182
accuracy 0.772472
Glucose Glucose Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Urea Urea Blood urea nitrogen (BUN)->Urea Body temperaure (daily maximum) Body temperaure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Body temperature (daily minimum) Body temperature (daily minimum) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Body temperature (daily minimum)->Partial pressure of oxygen (Blood gas test) Heart rate (daily minimum) Heart rate (daily minimum) Body temperature (daily minimum)->Heart rate (daily minimum) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Body temperature (daily minimum)->Lactate dehydrogenase (LDH) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Body temperature (daily minimum)->Diastolic blood pressure (daily maximum) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum)->Glucose Oxygen saturation (daily maximum)->Blood urea nitrogen (BUN) Oxygen saturation (daily maximum)->Body temperaure (daily maximum) Oxygen saturation (daily maximum)->Heart rate (daily maximum) Oxygen saturation (daily maximum)->Body temperature (daily minimum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Oxygen saturation (daily maximum)->Red Cell Blood Distribution Width (RDW) Calcium Calcium Oxygen saturation (daily maximum)->Calcium Eosinophil % Eosinophil % Oxygen saturation (daily maximum)->Eosinophil % Oxygen saturation (daily maximum)->Urea Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Systolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Partial pressure of oxygen (Blood gas test) Age Age Oxygen saturation (daily maximum)->Age Ferritin Ferritin Oxygen saturation (daily maximum)->Ferritin Platelets Platelets Oxygen saturation (daily maximum)->Platelets Albumin Albumin Oxygen saturation (daily maximum)->Albumin Hemoglobin Hemoglobin Oxygen saturation (daily maximum)->Hemoglobin Mean corpuscular volume Mean corpuscular volume Oxygen saturation (daily maximum)->Mean corpuscular volume Oxygen saturation (daily maximum)->Heart rate (daily minimum) Lymphocyte % Lymphocyte % Oxygen saturation (daily maximum)->Lymphocyte % Prothrombin time (PT) Prothrombin time (PT) Oxygen saturation (daily maximum)->Prothrombin time (PT) Oxygen saturation (daily maximum)->Lactate dehydrogenase (LDH) Red blood cells Red blood cells Oxygen saturation (daily maximum)->Red blood cells International normalized ratio (INR) International normalized ratio (INR) Oxygen saturation (daily maximum)->International normalized ratio (INR) Diastolic blood pressure (daily minimum) Diastolic blood pressure (daily minimum) Oxygen saturation (daily maximum)->Diastolic blood pressure (daily minimum) Lymphocyte count Lymphocyte count Oxygen saturation (daily maximum)->Lymphocyte count Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Oxygen saturation (daily maximum)->Mean corpuscular hemoglobin concentration (MCHC) Oxygen saturation (daily maximum)->Diastolic blood pressure (daily maximum) Red Cell Blood Distribution Width (RDW)->Mean corpuscular hemoglobin concentration (MCHC) Calcium->Platelets Eosinophil %->Body temperaure (daily maximum) Eosinophil %->Albumin Eosinophil %->Red blood cells Eosinophil %->International normalized ratio (INR) Eosinophil %->Lymphocyte count Age->Blood urea nitrogen (BUN) Albumin->Calcium Hematocrit Hematocrit Hemoglobin->Hematocrit Lactate dehydrogenase (LDH)->Ferritin Red blood cells->Red Cell Blood Distribution Width (RDW) Red blood cells->Hemoglobin Red blood cells->Mean corpuscular volume Red blood cells->Hematocrit International normalized ratio (INR)->Prothrombin time (PT) Lymphocyte count->Glucose Lymphocyte count->Lymphocyte % Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum)->Heart rate (daily maximum) Oxygen saturation (daily minimum)->Body temperature (daily minimum) Oxygen saturation (daily minimum)->Oxygen saturation (daily maximum) Oxygen saturation (daily minimum)->Eosinophil % Oxygen saturation (daily minimum)->Systolic blood pressure (daily maximum) Oxygen saturation (daily minimum)->Age Oxygen saturation (daily minimum)->Diastolic blood pressure (daily minimum) Death Death Death->Glucose Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Heart rate (daily maximum) Death->Body temperature (daily minimum) Death->Oxygen saturation (daily maximum) Death->Red Cell Blood Distribution Width (RDW) Death->Calcium Death->Eosinophil % Death->Urea Death->Systolic blood pressure (daily maximum) Death->Partial pressure of oxygen (Blood gas test) Death->Age Death->Ferritin Death->Platelets Death->Albumin Death->Hemoglobin Death->Mean corpuscular volume Death->Heart rate (daily minimum) Death->Lymphocyte % Death->Prothrombin time (PT) Death->Lactate dehydrogenase (LDH) Death->Red blood cells Death->International normalized ratio (INR) Death->Diastolic blood pressure (daily minimum) Death->Lymphocyte count Death->Oxygen saturation (daily minimum) Death->Hematocrit Death->Mean corpuscular hemoglobin concentration (MCHC) Death->Diastolic blood pressure (daily maximum)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 2
max_indegree 0
max_iters 0
mi_nneighbors 200
mi_thres 0.0
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8590092589799312
In [22]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8350205181195418 auc.
Metric Value
roc 0.844832
brier_loss 0.143931
specificity0.898649
sensitivity0.473684
accuracy 0.830028
Glucose Glucose Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Urea Urea Blood urea nitrogen (BUN)->Urea Albumin Albumin Blood urea nitrogen (BUN)->Albumin Body temperaure (daily maximum) Body temperaure (daily maximum) Body temperaure (daily maximum)->Glucose Heart rate (daily maximum) Heart rate (daily maximum) Body temperaure (daily maximum)->Heart rate (daily maximum) Body temperature (daily minimum) Body temperature (daily minimum) Partial pressure of CO2 (Blood gas test) Partial pressure of CO2 (Blood gas test) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) D-Dimer D-Dimer Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Systolic blood pressure (first measure) Systolic blood pressure (first measure) Systolic blood pressure (daily maximum)->Systolic blood pressure (first measure) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Segmented neutrophils % Segmented neutrophils % Age Age Age->Glucose Age->Blood urea nitrogen (BUN) Age->Body temperaure (daily maximum) Age->Red Cell Blood Distribution Width (RDW) Age->Urea Age->Systolic blood pressure (daily maximum) Age->Albumin International normalized ratio (INR) International normalized ratio (INR) Age->International normalized ratio (INR) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Age->Diastolic blood pressure (daily maximum) Total CO2 (blood gas test) Total CO2 (blood gas test) Platelets Platelets Albumin->Partial pressure of CO2 (Blood gas test) Albumin->Red Cell Blood Distribution Width (RDW) Albumin->International normalized ratio (INR) Albumin->Diastolic blood pressure (daily maximum) Mean corpuscular volume Mean corpuscular volume C-reactive protein C-reactive protein Lymphocyte % Lymphocyte % Estimated glomerular filtration rate (eGFR) ckd-epi Estimated glomerular filtration rate (eGFR) ckd-epi Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Monocytes % Monocytes % Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Hematocrit Hematocrit Diastolic blood pressure (daily maximum)->Body temperaure (daily maximum) Diastolic blood pressure (daily maximum)->Heart rate (daily maximum) Diastolic blood pressure (daily maximum)->Partial pressure of CO2 (Blood gas test) Diastolic blood pressure (daily maximum)->Systolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum)->Systolic blood pressure (first measure) Death Death Death->Glucose Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Heart rate (daily maximum) Death->Partial pressure of CO2 (Blood gas test) Death->Red Cell Blood Distribution Width (RDW) Death->Urea Death->Systolic blood pressure (daily maximum) Death->Age Death->Albumin Death->International normalized ratio (INR) Death->Systolic blood pressure (first measure) Death->Diastolic blood pressure (daily maximum)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 2
max_indegree 0
max_iters 0
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8350205181195418
In [23]:
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8100308596877793 auc.
Metric Value
roc 0.811542
brier_loss 0.127926
specificity0.925581
sensitivity0.333333
accuracy 0.859504
Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Body temperaure (daily maximum)->Heart rate (daily maximum) Heart rate (daily minimum) Heart rate (daily minimum) Body temperaure (daily maximum)->Heart rate (daily minimum) Lymphocyte count Lymphocyte count Body temperaure (daily maximum)->Lymphocyte count Heart rate (first measure) Heart rate (first measure) Heart rate (daily maximum)->Heart rate (first measure) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Calcium Calcium Red Cell Blood Distribution Width (RDW)->Calcium Calcium->Body temperaure (daily maximum) Albumin Albumin Calcium->Albumin Calcium->Heart rate (daily minimum) Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) D-Dimer D-Dimer Age Age Age->Blood urea nitrogen (BUN) Age->Red Cell Blood Distribution Width (RDW) Age->Calcium Age->Albumin Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Age->Oxygen saturation (daily minimum) Albumin->Blood urea nitrogen (BUN) Albumin->Body temperaure (daily maximum) Heart rate (daily minimum)->Oxygen saturation (daily minimum) Lymphocyte % Lymphocyte % Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Lymphocyte count->Partial Thromboplastin Time ratio Monocytes % Monocytes % Oxygen saturation (daily minimum)->Heart rate (daily maximum) Oxygen saturation (daily minimum)->Partial Thromboplastin Time ratio Oxygen saturation (daily minimum)->Heart rate (first measure) Oxygen saturation (daily minimum)->Lymphocyte count Death Death Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Heart rate (daily maximum) Death->Red Cell Blood Distribution Width (RDW) Death->Calcium Death->Partial Thromboplastin Time ratio Death->Age Death->Albumin Death->Heart rate (daily minimum) Death->Heart rate (first measure) Death->Lymphocyte count Death->Oxygen saturation (daily minimum)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 2
max_indegree 0
max_iters 0
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8100308596877793
In [24]:
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.7798885361552028 auc.
Metric Value
roc 0.764277
brier_loss 0.146185
specificity0.916667
sensitivity0.181818
accuracy 0.825843
Glucose Glucose Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Heart rate (daily maximum) Heart rate (daily maximum) Derived fibrinogen Derived fibrinogen Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Eosinophil % Eosinophil % Oxygen saturation (first measure) Oxygen saturation (first measure) Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Oxygen saturation (first measure)->Partial Thromboplastin Time ratio Creatinine Creatinine Oxygen saturation (first measure)->Creatinine Age Age Oxygen saturation (first measure)->Age Heart rate (first measure) Heart rate (first measure) Oxygen saturation (first measure)->Heart rate (first measure) Systolic blood pressure (daily minimum) Systolic blood pressure (daily minimum) Oxygen saturation (first measure)->Systolic blood pressure (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (first measure)->Oxygen saturation (daily minimum) Urea Urea D-Dimer D-Dimer Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Creatinine->Blood urea nitrogen (BUN) Segmented neutrophils % Segmented neutrophils % Age->Blood urea nitrogen (BUN) Age->Partial Thromboplastin Time ratio Age->Creatinine Age->Oxygen saturation (daily minimum) Hemoglobin Hemoglobin Mean corpuscular volume Mean corpuscular volume C-reactive protein C-reactive protein Lymphocyte % Lymphocyte % Prothrombin time (PT) Prothrombin time (PT) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Hemolysis index Hemolysis index Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Monocytes % Monocytes % Oxygen saturation (daily minimum)->Heart rate (first measure) Oxygen saturation (daily minimum)->Systolic blood pressure (daily minimum) Hematocrit Hematocrit Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Systolic blood pressure (first measure) Systolic blood pressure (first measure) Death Death Death->Blood urea nitrogen (BUN) Death->Oxygen saturation (first measure) Death->Partial Thromboplastin Time ratio Death->Creatinine Death->Age Death->Heart rate (first measure) Death->Systolic blood pressure (daily minimum) Death->Oxygen saturation (daily minimum)
Hyperparameter Value
bn_score bic
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 2
max_indegree 0
max_iters 0
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.7798885361552028

Unconstrained BNC CLG¶

In [12]:
model = spbn.HCClassifierCLG(
    random_state=SEED, n_jobs=-1, bn_score='bic'
)
nombre_modelo = 'Unconstrained-BNC-CLG'
search_space = {
    'epsilon': Real(1e-6, 1, prior='log-uniform', base=20),
    'max_indegree': Integer(0, 5),
}

Ola 1¶

In [14]:
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=50, verbose=0)
print(report['train_score'])
Tuning params epsilon, max_indegree
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
Tune result: 0.8530678709129216 auc.
Metric Value
roc 0.838101
brier_loss 0.188457
specificity0.865854
sensitivity0.581818
accuracy 0.77809
Death Death Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Death->Red Cell Blood Distribution Width (RDW) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Death->Blood urea nitrogen (BUN) Calcium Calcium Death->Calcium Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Death->Partial pressure of oxygen (Blood gas test) International normalized ratio (INR) International normalized ratio (INR) Death->International normalized ratio (INR) Heart rate (daily maximum) Heart rate (daily maximum) Death->Heart rate (daily maximum) Age Age Death->Age Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Death->Lactate dehydrogenase (LDH) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Death->Oxygen saturation (daily maximum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Death->Oxygen saturation (daily minimum) Glucose Glucose Death->Glucose Body temperature (daily minimum) Body temperature (daily minimum) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Body temperature (daily minimum)->Diastolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Red Cell Blood Distribution Width (RDW)->Mean corpuscular hemoglobin concentration (MCHC) Hemoglobin Hemoglobin Red Cell Blood Distribution Width (RDW)->Hemoglobin Platelets Platelets Urea Urea Blood urea nitrogen (BUN)->Urea Lymphocyte % Lymphocyte % Blood urea nitrogen (BUN)->Lymphocyte % Lymphocyte count Lymphocyte count Calcium->Platelets Albumin Albumin Calcium->Albumin Hematocrit Hematocrit Red blood cells Red blood cells Hematocrit->Red blood cells Lymphocyte %->Lymphocyte count Body temperaure (daily maximum) Body temperaure (daily maximum) Eosinophil % Eosinophil % Body temperaure (daily maximum)->Eosinophil % Mean corpuscular volume Mean corpuscular volume Ferritin Ferritin Diastolic blood pressure (daily maximum)->Systolic blood pressure (daily maximum) Prothrombin time (PT) Prothrombin time (PT) International normalized ratio (INR)->Prothrombin time (PT) Heart rate (daily maximum)->Body temperaure (daily maximum) Red blood cells->Mean corpuscular volume Lactate dehydrogenase (LDH)->Ferritin Hemoglobin->Hematocrit Heart rate (daily minimum) Heart rate (daily minimum) Heart rate (daily minimum)->Body temperature (daily minimum) Diastolic blood pressure (daily minimum) Diastolic blood pressure (daily minimum) Heart rate (daily minimum)->Diastolic blood pressure (daily minimum) Oxygen saturation (daily maximum)->Heart rate (daily minimum)
Hyperparameter Value
bn_score bic
epsilon 1.0000000000000002e-06
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 1
max_iters 2147483647
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8530678709129216

Ola 2¶

In [16]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=50, verbose=0)
print(report['train_score'])
Tuning params epsilon, max_indegree
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
Tune result: 0.8276863756662045 auc.
Metric Value
roc 0.815226
brier_loss 0.159931
specificity0.902027
sensitivity0.403509
accuracy 0.82153
Death Death Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Death->Red Cell Blood Distribution Width (RDW) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Death->Blood urea nitrogen (BUN) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Death->Partial pressure of oxygen (Blood gas test) D-Dimer D-Dimer Death->D-Dimer Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Death->Current bicarbonate (blood gas test) International normalized ratio (INR) International normalized ratio (INR) Death->International normalized ratio (INR) Heart rate (daily maximum) Heart rate (daily maximum) Death->Heart rate (daily maximum) Age Age Death->Age Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Death->Lactate dehydrogenase (LDH) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Death->Oxygen saturation (daily maximum) Segmented neutrophils % Segmented neutrophils % Monocytes % Monocytes % Segmented neutrophils %->Monocytes % Lymphocyte % Lymphocyte % Segmented neutrophils %->Lymphocyte % Glucose Glucose Segmented neutrophils %->Glucose Body temperature (daily minimum) Body temperature (daily minimum) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Body temperature (daily minimum)->Diastolic blood pressure (daily maximum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Body temperature (daily minimum)->Oxygen saturation (daily minimum) Estimated glomerular filtration rate (eGFR) ckd-epi Estimated glomerular filtration rate (eGFR) ckd-epi Estimated glomerular filtration rate (eGFR) ckd-epi->Monocytes % Platelets Platelets Estimated glomerular filtration rate (eGFR) ckd-epi->Platelets Estimated glomerular filtration rate (eGFR) ckd-epi->Blood urea nitrogen (BUN) Urea Urea Estimated glomerular filtration rate (eGFR) ckd-epi->Urea Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Systolic blood pressure (first measure) Systolic blood pressure (first measure) Systolic blood pressure (daily maximum)->Systolic blood pressure (first measure) Systolic blood pressure (daily maximum)->Diastolic blood pressure (daily maximum) Systolic blood pressure (daily maximum)->Oxygen saturation (daily minimum) Monocytes %->Systolic blood pressure (daily maximum) Monocytes %->Lymphocyte % Hematocrit Hematocrit Red Cell Blood Distribution Width (RDW)->Hematocrit Mean corpuscular volume Mean corpuscular volume Red Cell Blood Distribution Width (RDW)->Mean corpuscular volume Red Cell Blood Distribution Width (RDW)->Age Body temperaure (daily maximum) Body temperaure (daily maximum) Platelets->Body temperaure (daily maximum) Blood urea nitrogen (BUN)->Urea Blood urea nitrogen (BUN)->Hematocrit Blood urea nitrogen (BUN)->D-Dimer Partial pressure of CO2 (Blood gas test) Partial pressure of CO2 (Blood gas test) Blood urea nitrogen (BUN)->Partial pressure of CO2 (Blood gas test) Urea->Segmented neutrophils % Urea->Current bicarbonate (blood gas test) Urea->Mean corpuscular volume Hematocrit->Platelets Albumin Albumin Hematocrit->Albumin C-reactive protein C-reactive protein Hematocrit->C-reactive protein Hematocrit->Oxygen saturation (daily maximum) Total CO2 (blood gas test) Total CO2 (blood gas test) Current bicarbonate (blood gas test)->Total CO2 (blood gas test) Lymphocyte %->Albumin Total CO2 (blood gas test)->Partial pressure of CO2 (Blood gas test) Diastolic blood pressure (daily maximum)->Systolic blood pressure (first measure) Diastolic blood pressure (daily maximum)->Heart rate (daily maximum) Heart rate (daily maximum)->Body temperaure (daily maximum) Partial pressure of CO2 (Blood gas test)->Partial pressure of oxygen (Blood gas test) Partial pressure of CO2 (Blood gas test)->Lactate dehydrogenase (LDH) Age->Estimated glomerular filtration rate (eGFR) ckd-epi Age->Systolic blood pressure (daily maximum) Age->International normalized ratio (INR) Age->Glucose Lactate dehydrogenase (LDH)->C-reactive protein C-reactive protein->Segmented neutrophils % Oxygen saturation (daily maximum)->Body temperature (daily minimum)
Hyperparameter Value
bn_score bic
epsilon 1.0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 2
max_iters 2147483647
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8276863756662045

Olas 3,4, 5¶

In [17]:
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=50, verbose=0)
print(report['train_score'])
Tuning params epsilon, max_indegree
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
Tune result: 0.8148766489184498 auc.
Metric Value
roc 0.749182
brier_loss 0.126782
specificity0.939535
sensitivity0.259259
accuracy 0.863636
Death Death Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Death->Red Cell Blood Distribution Width (RDW) Lymphocyte count Lymphocyte count Death->Lymphocyte count Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Death->Blood urea nitrogen (BUN) D-Dimer D-Dimer Death->D-Dimer Heart rate (daily maximum) Heart rate (daily maximum) Death->Heart rate (daily maximum) Age Age Death->Age Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Death->Lactate dehydrogenase (LDH) Heart rate (daily minimum) Heart rate (daily minimum) Death->Heart rate (daily minimum) Heart rate (first measure) Heart rate (first measure) Monocytes % Monocytes % Red Cell Blood Distribution Width (RDW)->Monocytes % Lymphocyte count->Monocytes % Blood urea nitrogen (BUN)->Lymphocyte count Blood urea nitrogen (BUN)->D-Dimer Lymphocyte % Lymphocyte % Blood urea nitrogen (BUN)->Lymphocyte % Albumin Albumin Blood urea nitrogen (BUN)->Albumin Calcium Calcium Calcium->Albumin D-Dimer->Albumin Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio D-Dimer->Partial Thromboplastin Time ratio Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Current bicarbonate (blood gas test)->Blood urea nitrogen (BUN) Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Current bicarbonate (blood gas test)->Activated Partial Thromboplastin Time (aPTT) Lymphocyte %->Monocytes % Lymphocyte %->Lymphocyte count Lymphocyte %->Activated Partial Thromboplastin Time (aPTT) Body temperaure (daily maximum) Body temperaure (daily maximum) Body temperaure (daily maximum)->Lymphocyte count Body temperaure (daily maximum)->Calcium Albumin->Red Cell Blood Distribution Width (RDW) Albumin->Lymphocyte % Heart rate (daily maximum)->Heart rate (first measure) Heart rate (daily maximum)->Body temperaure (daily maximum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Heart rate (daily maximum)->Oxygen saturation (daily minimum) Age->Heart rate (first measure) Age->Monocytes % Age->Red Cell Blood Distribution Width (RDW) Age->Blood urea nitrogen (BUN) Age->Albumin Age->Lactate dehydrogenase (LDH) Age->Oxygen saturation (daily minimum) Lactate dehydrogenase (LDH)->Heart rate (first measure) Lactate dehydrogenase (LDH)->Calcium Lactate dehydrogenase (LDH)->Current bicarbonate (blood gas test) Lactate dehydrogenase (LDH)->Partial Thromboplastin Time ratio Heart rate (daily minimum)->Heart rate (first measure) Heart rate (daily minimum)->Body temperaure (daily maximum) Heart rate (daily minimum)->Lactate dehydrogenase (LDH) Heart rate (daily minimum)->Oxygen saturation (daily minimum) Oxygen saturation (daily minimum)->Lymphocyte % Partial Thromboplastin Time ratio->Activated Partial Thromboplastin Time (aPTT)
Hyperparameter Value
bn_score bic
epsilon 0.016566792669357525
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 4
max_iters 2147483647
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8148766489184498

Ola 6¶

In [18]:
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=50, verbose=0)
print(report['train_score'])
Tuning params epsilon, max_indegree
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
The objective has been evaluated at this point before.
Tune result: 0.725493919985148 auc.
Metric Value
roc 0.660548
brier_loss 0.160867
specificity0.923077
sensitivity0.0909091
accuracy 0.820225
Death Death Heart rate (first measure) Heart rate (first measure) Death->Heart rate (first measure) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Death->Blood urea nitrogen (BUN) Hemolysis index Hemolysis index Death->Hemolysis index Glucose Glucose Death->Glucose Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Death->Oxygen saturation (daily maximum) Segmented neutrophils % Segmented neutrophils % Monocytes % Monocytes % Segmented neutrophils %->Monocytes % Eosinophil % Eosinophil % Segmented neutrophils %->Eosinophil % Oxygen saturation (first measure) Oxygen saturation (first measure) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (first measure)->Oxygen saturation (daily minimum) Creatinine Creatinine Hemoglobin Hemoglobin Creatinine->Hemoglobin Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Systolic blood pressure (daily maximum)->Partial Thromboplastin Time ratio Heart rate (daily maximum) Heart rate (daily maximum) Heart rate (first measure)->Heart rate (daily maximum) Urea Urea Blood urea nitrogen (BUN)->Urea Age Age Blood urea nitrogen (BUN)->Age Urea->Creatinine Hematocrit Hematocrit Systolic blood pressure (daily minimum) Systolic blood pressure (daily minimum) D-Dimer D-Dimer Systolic blood pressure (first measure) Systolic blood pressure (first measure) Systolic blood pressure (first measure)->Systolic blood pressure (daily maximum) Systolic blood pressure (first measure)->Systolic blood pressure (daily minimum) Lymphocyte % Lymphocyte % Lymphocyte %->Segmented neutrophils % Mean corpuscular volume Mean corpuscular volume Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Age->Systolic blood pressure (first measure) Age->Mean corpuscular volume Prothrombin time (PT) Prothrombin time (PT) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH)->D-Dimer Derived fibrinogen Derived fibrinogen Lactate dehydrogenase (LDH)->Derived fibrinogen Hemoglobin->Hematocrit Hemoglobin->Mean corpuscular hemoglobin concentration (MCHC) C-reactive protein C-reactive protein Derived fibrinogen->C-reactive protein C-reactive protein->Lymphocyte % Partial Thromboplastin Time ratio->Activated Partial Thromboplastin Time (aPTT) Partial Thromboplastin Time ratio->Prothrombin time (PT) Oxygen saturation (daily maximum)->Oxygen saturation (first measure) Oxygen saturation (daily maximum)->Lactate dehydrogenase (LDH)
Hyperparameter Value
bn_score bic
epsilon 0.0002904553940801573
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 1
max_iters 2147483647
n_jobs -1
num_folds 10
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.725493919985148

Naive-Bayes SP¶

In [32]:
model = spbn.KDBBNClassifierSP(k=0, random_state=SEED, n_jobs=-1, bn_score='holdout-lik', max_iters=1000)
nombre_modelo = 'Naive-Bayes-SP'
search_space = {
    'mi_thres': Categorical([0, 0.1]),
    'mi_nneighbors': Categorical([5, 10, 50, 100]),
    'bw_sel': Categorical(['scott', 'ucv']),
}
In [34]:
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8938889335377386 auc.
Metric Value
roc 0.863193
brier_loss 0.17123
specificity0.886179
sensitivity0.590909
accuracy 0.794944
Glucose Glucose Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Body temperature (daily minimum) Body temperature (daily minimum) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Calcium Calcium Eosinophil % Eosinophil % Urea Urea Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Age Age Ferritin Ferritin Platelets Platelets Albumin Albumin Hemoglobin Hemoglobin Mean corpuscular volume Mean corpuscular volume Heart rate (daily minimum) Heart rate (daily minimum) Lymphocyte % Lymphocyte % Prothrombin time (PT) Prothrombin time (PT) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Red blood cells Red blood cells International normalized ratio (INR) International normalized ratio (INR) Diastolic blood pressure (daily minimum) Diastolic blood pressure (daily minimum) Lymphocyte count Lymphocyte count Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Hematocrit Hematocrit Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Death Death Death->Glucose Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Heart rate (daily maximum) Death->Body temperature (daily minimum) Death->Oxygen saturation (daily maximum) Death->Red Cell Blood Distribution Width (RDW) Death->Calcium Death->Eosinophil % Death->Urea Death->Systolic blood pressure (daily maximum) Death->Partial pressure of oxygen (Blood gas test) Death->Age Death->Ferritin Death->Platelets Death->Albumin Death->Hemoglobin Death->Mean corpuscular volume Death->Heart rate (daily minimum) Death->Lymphocyte % Death->Prothrombin time (PT) Death->Lactate dehydrogenase (LDH) Death->Red blood cells Death->International normalized ratio (INR) Death->Diastolic blood pressure (daily minimum) Death->Lymphocyte count Death->Oxygen saturation (daily minimum) Death->Hematocrit Death->Mean corpuscular hemoglobin concentration (MCHC) Death->Diastolic blood pressure (daily maximum)
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 0
max_indegree 0
max_iters 1000
mi_nneighbors 50
mi_thres 0.0
n_jobs -1
num_folds 10
operators ('node_type',)
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8938889335377386
In [35]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8522595499399539 auc.
Metric Value
roc 0.856271
brier_loss 0.132122
specificity0.89527
sensitivity0.491228
accuracy 0.830028
Glucose Glucose Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Body temperature (daily minimum) Body temperature (daily minimum) Partial pressure of CO2 (Blood gas test) Partial pressure of CO2 (Blood gas test) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Urea Urea D-Dimer D-Dimer Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Segmented neutrophils % Segmented neutrophils % Age Age Total CO2 (blood gas test) Total CO2 (blood gas test) Platelets Platelets Albumin Albumin Mean corpuscular volume Mean corpuscular volume C-reactive protein C-reactive protein Lymphocyte % Lymphocyte % Estimated glomerular filtration rate (eGFR) ckd-epi Estimated glomerular filtration rate (eGFR) ckd-epi Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) International normalized ratio (INR) International normalized ratio (INR) Monocytes % Monocytes % Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Hematocrit Hematocrit Systolic blood pressure (first measure) Systolic blood pressure (first measure) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Death Death Death->Glucose Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Heart rate (daily maximum) Death->Partial pressure of CO2 (Blood gas test) Death->Red Cell Blood Distribution Width (RDW) Death->Urea Death->Systolic blood pressure (daily maximum) Death->Age Death->Albumin Death->International normalized ratio (INR) Death->Systolic blood pressure (first measure) Death->Diastolic blood pressure (daily maximum)
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 0
max_indegree 0
max_iters 1000
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
operators ('node_type',)
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8522595499399539
In [36]:
report, data = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.848859498625713 auc.
Metric Value
roc 0.823256
brier_loss 0.0990414
specificity0.944186
sensitivity0.222222
accuracy 0.863636
Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Calcium Calcium Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) D-Dimer D-Dimer Age Age Albumin Albumin Heart rate (daily minimum) Heart rate (daily minimum) Lymphocyte % Lymphocyte % Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Heart rate (first measure) Heart rate (first measure) Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Lymphocyte count Lymphocyte count Monocytes % Monocytes % Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Death Death Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Heart rate (daily maximum) Death->Red Cell Blood Distribution Width (RDW) Death->Calcium Death->Partial Thromboplastin Time ratio Death->Age Death->Albumin Death->Heart rate (daily minimum) Death->Heart rate (first measure) Death->Lymphocyte count Death->Oxygen saturation (daily minimum)
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 0
max_indegree 0
max_iters 1000
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
operators ('node_type',)
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.848859498625713
In [37]:
report, data = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.7896872551749745 auc.
Metric Value
roc 0.797786
brier_loss 0.119946
specificity0.948718
sensitivity0.272727
accuracy 0.865169
Glucose Glucose Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Heart rate (daily maximum) Heart rate (daily maximum) Derived fibrinogen Derived fibrinogen Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Eosinophil % Eosinophil % Oxygen saturation (first measure) Oxygen saturation (first measure) Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Urea Urea D-Dimer D-Dimer Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Creatinine Creatinine Segmented neutrophils % Segmented neutrophils % Age Age Hemoglobin Hemoglobin Mean corpuscular volume Mean corpuscular volume C-reactive protein C-reactive protein Lymphocyte % Lymphocyte % Prothrombin time (PT) Prothrombin time (PT) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Heart rate (first measure) Heart rate (first measure) Hemolysis index Hemolysis index Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Monocytes % Monocytes % Systolic blood pressure (daily minimum) Systolic blood pressure (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Hematocrit Hematocrit Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Systolic blood pressure (first measure) Systolic blood pressure (first measure) Death Death Death->Blood urea nitrogen (BUN) Death->Oxygen saturation (first measure) Death->Partial Thromboplastin Time ratio Death->Creatinine Death->Age Death->Heart rate (first measure) Death->Systolic blood pressure (daily minimum) Death->Oxygen saturation (daily minimum)
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 0
max_indegree 0
max_iters 1000
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
operators ('node_type',)
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.7896872551749745

Chow-Liu TAN SP¶

In [12]:
model = spbn.CLTANBNClassifierSP(random_state=SEED, n_jobs=-1, bn_score='holdout-lik', max_iters=1000)
nombre_modelo = 'Chow-Liu-TAN-SP'
search_space = {
    'mi_thres': Categorical([0, 0.1]),
    'mi_nneighbors': Categorical([5, 10, 50, 100]),
    'bw_sel': Categorical(['scott', 'ucv']),
}
In [14]:
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8809358187449435 auc.
Metric Value
roc 0.85765
brier_loss 0.171022
specificity0.902439
sensitivity0.563636
accuracy 0.797753
Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Urea Urea Blood urea nitrogen (BUN)->Urea Red blood cells Red blood cells Hematocrit Hematocrit Red blood cells->Hematocrit Mean corpuscular volume Mean corpuscular volume Red blood cells->Mean corpuscular volume Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum)->Red blood cells Heart rate (daily minimum) Heart rate (daily minimum) Oxygen saturation (daily maximum)->Heart rate (daily minimum) Ferritin Ferritin Oxygen saturation (daily maximum)->Ferritin Platelets Platelets Oxygen saturation (daily maximum)->Platelets Age Age Oxygen saturation (daily maximum)->Age Diastolic blood pressure (daily minimum) Diastolic blood pressure (daily minimum) Oxygen saturation (daily maximum)->Diastolic blood pressure (daily minimum) Calcium Calcium Oxygen saturation (daily maximum)->Calcium Heart rate (daily maximum) Heart rate (daily maximum) Oxygen saturation (daily maximum)->Heart rate (daily maximum) Albumin Albumin Oxygen saturation (daily maximum)->Albumin International normalized ratio (INR) International normalized ratio (INR) Oxygen saturation (daily maximum)->International normalized ratio (INR) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Oxygen saturation (daily maximum)->Lactate dehydrogenase (LDH) Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Oxygen saturation (daily maximum)->Mean corpuscular hemoglobin concentration (MCHC) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Oxygen saturation (daily maximum)->Red Cell Blood Distribution Width (RDW) Glucose Glucose Oxygen saturation (daily maximum)->Glucose Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Oxygen saturation (daily maximum)->Partial pressure of oxygen (Blood gas test) Urea->Oxygen saturation (daily maximum) Body temperature (daily minimum) Body temperature (daily minimum) Heart rate (daily minimum)->Body temperature (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Body temperature (daily minimum)->Oxygen saturation (daily minimum) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Body temperature (daily minimum)->Diastolic blood pressure (daily maximum) Lymphocyte count Lymphocyte count Body temperature (daily minimum)->Lymphocyte count Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum)->Systolic blood pressure (daily maximum) Eosinophil % Eosinophil % Diastolic blood pressure (daily maximum)->Eosinophil % Prothrombin time (PT) Prothrombin time (PT) International normalized ratio (INR)->Prothrombin time (PT) Hemoglobin Hemoglobin Hematocrit->Hemoglobin Body temperaure (daily maximum) Body temperaure (daily maximum) Eosinophil %->Body temperaure (daily maximum) Lymphocyte % Lymphocyte % Lymphocyte count->Lymphocyte % Death Death Death->Blood urea nitrogen (BUN) Death->Red blood cells Death->Oxygen saturation (daily maximum) Death->Urea Death->Heart rate (daily minimum) Death->Ferritin Death->Platelets Death->Oxygen saturation (daily minimum) Death->Body temperature (daily minimum) Death->Age Death->Diastolic blood pressure (daily minimum) Death->Calcium Death->Diastolic blood pressure (daily maximum) Death->Heart rate (daily maximum) Death->Albumin Death->Systolic blood pressure (daily maximum) Death->International normalized ratio (INR) Death->Prothrombin time (PT) Death->Lactate dehydrogenase (LDH) Death->Mean corpuscular hemoglobin concentration (MCHC) Death->Red Cell Blood Distribution Width (RDW) Death->Hemoglobin Death->Hematocrit Death->Body temperaure (daily maximum) Death->Eosinophil % Death->Lymphocyte count Death->Lymphocyte % Death->Glucose Death->Mean corpuscular volume Death->Partial pressure of oxygen (Blood gas test)
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 0
max_iters 1000
mi_nneighbors 10
mi_thres 0.0
n_jobs -1
num_folds 10
operators node_type
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8809358187449435
In [15]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8347171440386129 auc.
Metric Value
roc 0.837038
brier_loss 0.16004
specificity0.885135
sensitivity0.45614
accuracy 0.815864
Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Urea Urea Blood urea nitrogen (BUN)->Urea Estimated glomerular filtration rate (eGFR) ckd-epi Estimated glomerular filtration rate (eGFR) ckd-epi Urea->Estimated glomerular filtration rate (eGFR) ckd-epi C-reactive protein C-reactive protein Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum)->C-reactive protein Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily maximum)->Oxygen saturation (daily minimum) Platelets Platelets Oxygen saturation (daily maximum)->Platelets Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Diastolic blood pressure (daily maximum) D-Dimer D-Dimer Oxygen saturation (daily maximum)->D-Dimer Partial pressure of CO2 (Blood gas test) Partial pressure of CO2 (Blood gas test) Oxygen saturation (daily maximum)->Partial pressure of CO2 (Blood gas test) Heart rate (daily maximum) Heart rate (daily maximum) Oxygen saturation (daily maximum)->Heart rate (daily maximum) Albumin Albumin Oxygen saturation (daily maximum)->Albumin Systolic blood pressure (first measure) Systolic blood pressure (first measure) Oxygen saturation (daily maximum)->Systolic blood pressure (first measure) Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Systolic blood pressure (daily maximum) International normalized ratio (INR) International normalized ratio (INR) Oxygen saturation (daily maximum)->International normalized ratio (INR) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Oxygen saturation (daily maximum)->Lactate dehydrogenase (LDH) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Oxygen saturation (daily maximum)->Red Cell Blood Distribution Width (RDW) Body temperaure (daily maximum) Body temperaure (daily maximum) Oxygen saturation (daily maximum)->Body temperaure (daily maximum) Glucose Glucose Oxygen saturation (daily maximum)->Glucose Monocytes % Monocytes % Oxygen saturation (daily maximum)->Monocytes % Hematocrit Hematocrit Oxygen saturation (daily maximum)->Hematocrit Mean corpuscular volume Mean corpuscular volume Oxygen saturation (daily maximum)->Mean corpuscular volume Body temperature (daily minimum) Body temperature (daily minimum) Oxygen saturation (daily maximum)->Body temperature (daily minimum) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Oxygen saturation (daily maximum)->Partial pressure of oxygen (Blood gas test) Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Total CO2 (blood gas test) Total CO2 (blood gas test) Total CO2 (blood gas test)->Current bicarbonate (blood gas test) Age Age Age->Oxygen saturation (daily maximum) Estimated glomerular filtration rate (eGFR) ckd-epi->Age Partial pressure of CO2 (Blood gas test)->Total CO2 (blood gas test) Lymphocyte % Lymphocyte % Segmented neutrophils % Segmented neutrophils % Segmented neutrophils %->Lymphocyte % Monocytes %->Segmented neutrophils % Death Death Death->Blood urea nitrogen (BUN) Death->Urea Death->C-reactive protein Death->Oxygen saturation (daily maximum) Death->Oxygen saturation (daily minimum) Death->Current bicarbonate (blood gas test) Death->Total CO2 (blood gas test) Death->Platelets Death->Age Death->Estimated glomerular filtration rate (eGFR) ckd-epi Death->Diastolic blood pressure (daily maximum) Death->D-Dimer Death->Partial pressure of CO2 (Blood gas test) Death->Heart rate (daily maximum) Death->Albumin Death->Systolic blood pressure (first measure) Death->Systolic blood pressure (daily maximum) Death->International normalized ratio (INR) Death->Lactate dehydrogenase (LDH) Death->Red Cell Blood Distribution Width (RDW) Death->Body temperaure (daily maximum) Death->Lymphocyte % Death->Segmented neutrophils % Death->Glucose Death->Monocytes % Death->Hematocrit Death->Mean corpuscular volume Death->Body temperature (daily minimum) Death->Partial pressure of oxygen (Blood gas test)
Hyperparameter Value
bn_score holdout-lik
bw_sel ucv
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 0
max_iters 1000
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
operators node_type
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8347171440386129
In [16]:
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.7836899802185575 auc.
Metric Value
roc 0.780879
brier_loss 0.111389
specificity0.95814
sensitivity0.222222
accuracy 0.876033
Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Age Age Blood urea nitrogen (BUN)->Age Albumin Albumin Blood urea nitrogen (BUN)->Albumin Heart rate (daily minimum) Heart rate (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum)->Heart rate (daily minimum) Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Oxygen saturation (daily minimum)->Current bicarbonate (blood gas test) Calcium Calcium Oxygen saturation (daily minimum)->Calcium Heart rate (daily maximum) Heart rate (daily maximum) Oxygen saturation (daily minimum)->Heart rate (daily maximum) D-Dimer D-Dimer Oxygen saturation (daily minimum)->D-Dimer Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Oxygen saturation (daily minimum)->Lactate dehydrogenase (LDH) Body temperaure (daily maximum) Body temperaure (daily maximum) Oxygen saturation (daily minimum)->Body temperaure (daily maximum) Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Oxygen saturation (daily minimum)->Partial Thromboplastin Time ratio Lymphocyte count Lymphocyte count Oxygen saturation (daily minimum)->Lymphocyte count Age->Oxygen saturation (daily minimum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Age->Red Cell Blood Distribution Width (RDW) Heart rate (first measure) Heart rate (first measure) Heart rate (daily maximum)->Heart rate (first measure) Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Partial Thromboplastin Time ratio->Activated Partial Thromboplastin Time (aPTT) Lymphocyte % Lymphocyte % Lymphocyte count->Lymphocyte % Monocytes % Monocytes % Lymphocyte count->Monocytes % Death Death Death->Blood urea nitrogen (BUN) Death->Heart rate (daily minimum) Death->Oxygen saturation (daily minimum) Death->Current bicarbonate (blood gas test) Death->Age Death->Calcium Death->Heart rate (first measure) Death->Heart rate (daily maximum) Death->D-Dimer Death->Albumin Death->Lactate dehydrogenase (LDH) Death->Red Cell Blood Distribution Width (RDW) Death->Body temperaure (daily maximum) Death->Activated Partial Thromboplastin Time (aPTT) Death->Partial Thromboplastin Time ratio Death->Lymphocyte count Death->Lymphocyte % Death->Monocytes %
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 0
max_iters 1000
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
operators node_type
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.7836899802185575
In [17]:
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.7136411027568923 auc.
Metric Value
roc 0.741841
brier_loss 0.153588
specificity0.916667
sensitivity0.227273
accuracy 0.831461
Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Urea Urea Blood urea nitrogen (BUN)->Urea Age Age Blood urea nitrogen (BUN)->Age Creatinine Creatinine Urea->Creatinine Derived fibrinogen Derived fibrinogen C-reactive protein C-reactive protein Derived fibrinogen->C-reactive protein Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum)->Derived fibrinogen Systolic blood pressure (daily minimum) Systolic blood pressure (daily minimum) Oxygen saturation (daily maximum)->Systolic blood pressure (daily minimum) Oxygen saturation (first measure) Oxygen saturation (first measure) Oxygen saturation (daily maximum)->Oxygen saturation (first measure) Heart rate (first measure) Heart rate (first measure) Oxygen saturation (daily maximum)->Heart rate (first measure) D-Dimer D-Dimer Oxygen saturation (daily maximum)->D-Dimer Heart rate (daily maximum) Heart rate (daily maximum) Oxygen saturation (daily maximum)->Heart rate (daily maximum) Systolic blood pressure (first measure) Systolic blood pressure (first measure) Oxygen saturation (daily maximum)->Systolic blood pressure (first measure) Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Systolic blood pressure (daily maximum) Prothrombin time (PT) Prothrombin time (PT) Oxygen saturation (daily maximum)->Prothrombin time (PT) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Oxygen saturation (daily maximum)->Lactate dehydrogenase (LDH) Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Oxygen saturation (daily maximum)->Mean corpuscular hemoglobin concentration (MCHC) Hemoglobin Hemoglobin Oxygen saturation (daily maximum)->Hemoglobin Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Oxygen saturation (daily maximum)->Activated Partial Thromboplastin Time (aPTT) Hemolysis index Hemolysis index Oxygen saturation (daily maximum)->Hemolysis index Eosinophil % Eosinophil % Oxygen saturation (daily maximum)->Eosinophil % Glucose Glucose Oxygen saturation (daily maximum)->Glucose Monocytes % Monocytes % Oxygen saturation (daily maximum)->Monocytes % Mean corpuscular volume Mean corpuscular volume Oxygen saturation (daily maximum)->Mean corpuscular volume Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (first measure)->Oxygen saturation (daily minimum) Age->Oxygen saturation (daily maximum) Hematocrit Hematocrit Hemoglobin->Hematocrit Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Activated Partial Thromboplastin Time (aPTT)->Partial Thromboplastin Time ratio Lymphocyte % Lymphocyte % Segmented neutrophils % Segmented neutrophils % Segmented neutrophils %->Lymphocyte % Monocytes %->Segmented neutrophils % Death Death Death->Blood urea nitrogen (BUN) Death->Urea Death->Derived fibrinogen Death->Oxygen saturation (daily maximum) Death->Systolic blood pressure (daily minimum) Death->C-reactive protein Death->Oxygen saturation (daily minimum) Death->Oxygen saturation (first measure) Death->Creatinine Death->Age Death->Heart rate (first measure) Death->D-Dimer Death->Heart rate (daily maximum) Death->Systolic blood pressure (first measure) Death->Systolic blood pressure (daily maximum) Death->Prothrombin time (PT) Death->Lactate dehydrogenase (LDH) Death->Mean corpuscular hemoglobin concentration (MCHC) Death->Hemoglobin Death->Activated Partial Thromboplastin Time (aPTT) Death->Hemolysis index Death->Lymphocyte % Death->Segmented neutrophils % Death->Eosinophil % Death->Glucose Death->Partial Thromboplastin Time ratio Death->Monocytes % Death->Hematocrit Death->Mean corpuscular volume
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 0
max_iters 1000
mi_nneighbors 100
mi_thres 0.0
n_jobs -1
num_folds 10
operators node_type
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.7136411027568923

2DB SP¶

In [12]:
model = spbn.KDBBNClassifierSP(k=2, random_state=SEED, n_jobs=-1, bn_score='holdout-lik', max_iters=500)
nombre_modelo = '2DB-BNC-SP'
search_space = {
    'mi_thres': Categorical([0, 0.1]),
    'mi_nneighbors': Categorical([5, 10, 50, 100]),
    'bw_sel': Categorical(['scott', 'normal_reference']),
}
In [13]:
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8759334490170628 auc.
Metric Value
roc 0.827384
brier_loss 0.196265
specificity0.890244
sensitivity0.527273
accuracy 0.77809
Prothrombin time (PT) Prothrombin time (PT) International normalized ratio (INR) International normalized ratio (INR) Prothrombin time (PT)->International normalized ratio (INR) Body temperature (daily minimum) Body temperature (daily minimum) Prothrombin time (PT)->Body temperature (daily minimum) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Prothrombin time (PT)->Oxygen saturation (daily maximum) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH)->Prothrombin time (PT) Platelets Platelets Lactate dehydrogenase (LDH)->Platelets Ferritin Ferritin Lactate dehydrogenase (LDH)->Ferritin Hematocrit Hematocrit Lactate dehydrogenase (LDH)->Hematocrit Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Heart rate (daily minimum) Heart rate (daily minimum) Glucose Glucose Calcium Calcium Albumin Albumin Calcium->Albumin Mean corpuscular volume Mean corpuscular volume Mean corpuscular volume->Ferritin Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular volume->Mean corpuscular hemoglobin concentration (MCHC) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Platelets->Mean corpuscular volume Platelets->Hematocrit Lymphocyte count Lymphocyte count Lymphocyte count->Body temperaure (daily maximum) Lymphocyte count->Calcium Eosinophil % Eosinophil % Eosinophil %->Body temperaure (daily maximum) Eosinophil %->Calcium Ferritin->Mean corpuscular hemoglobin concentration (MCHC) Lymphocyte % Lymphocyte % Lymphocyte %->Prothrombin time (PT) Lymphocyte %->Lactate dehydrogenase (LDH) Lymphocyte %->Platelets Lymphocyte %->Lymphocyte count Red blood cells Red blood cells Hemoglobin Hemoglobin Red blood cells->Hemoglobin Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum)->Heart rate (daily maximum) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Diastolic blood pressure (daily minimum) Diastolic blood pressure (daily minimum) Diastolic blood pressure (daily minimum)->Systolic blood pressure (daily maximum) Diastolic blood pressure (daily minimum)->Heart rate (daily minimum) Diastolic blood pressure (daily minimum)->Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily minimum)->Eosinophil % Diastolic blood pressure (daily minimum)->Oxygen saturation (daily minimum) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Age Age Age->Systolic blood pressure (daily maximum) Age->Blood urea nitrogen (BUN) Age->Lymphocyte count Age->Eosinophil % Age->Red blood cells Age->Oxygen saturation (daily minimum) Age->Diastolic blood pressure (daily minimum) Age->Red Cell Blood Distribution Width (RDW) Urea Urea Age->Urea Body temperature (daily minimum)->Heart rate (daily maximum) Body temperature (daily minimum)->Heart rate (daily minimum) Body temperature (daily minimum)->Glucose Body temperature (daily minimum)->Diastolic blood pressure (daily maximum) Body temperature (daily minimum)->Albumin Body temperature (daily minimum)->Partial pressure of oxygen (Blood gas test) Body temperature (daily minimum)->Diastolic blood pressure (daily minimum) Body temperature (daily minimum)->Age Hematocrit->Mean corpuscular volume Hematocrit->Red blood cells Hematocrit->Oxygen saturation (daily maximum) Hematocrit->Hemoglobin Oxygen saturation (daily maximum)->International normalized ratio (INR) Oxygen saturation (daily maximum)->Glucose Oxygen saturation (daily maximum)->Partial pressure of oxygen (Blood gas test) Oxygen saturation (daily maximum)->Urea Mean corpuscular hemoglobin concentration (MCHC)->Red Cell Blood Distribution Width (RDW) Mean corpuscular hemoglobin concentration (MCHC)->Age Mean corpuscular hemoglobin concentration (MCHC)->Body temperature (daily minimum) Urea->Blood urea nitrogen (BUN) Death Death Death->Prothrombin time (PT) Death->Lactate dehydrogenase (LDH) Death->Systolic blood pressure (daily maximum) Death->Heart rate (daily maximum) Death->Blood urea nitrogen (BUN) Death->International normalized ratio (INR) Death->Body temperaure (daily maximum) Death->Heart rate (daily minimum) Death->Glucose Death->Calcium Death->Mean corpuscular volume Death->Diastolic blood pressure (daily maximum) Death->Platelets Death->Lymphocyte count Death->Eosinophil % Death->Albumin Death->Ferritin Death->Lymphocyte % Death->Red blood cells Death->Oxygen saturation (daily minimum) Death->Partial pressure of oxygen (Blood gas test) Death->Diastolic blood pressure (daily minimum) Death->Red Cell Blood Distribution Width (RDW) Death->Age Death->Body temperature (daily minimum) Death->Hematocrit Death->Oxygen saturation (daily maximum) Death->Mean corpuscular hemoglobin concentration (MCHC) Death->Urea Death->Hemoglobin
Hyperparameter Value
bn_score holdout-lik
bw_sel normal_reference
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 2
max_indegree 0
max_iters 500
mi_nneighbors 5
mi_thres 0.0
n_jobs -1
num_folds 10
operators ('node_type',)
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8759334490170628
In [14]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.825951000547996 auc.
Metric Value
roc 0.848032
brier_loss 0.122064
specificity0.945946
sensitivity0.368421
accuracy 0.852691
C-reactive protein C-reactive protein Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Heart rate (daily maximum) Heart rate (daily maximum) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Urea Urea Blood urea nitrogen (BUN)->Urea Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) International normalized ratio (INR) International normalized ratio (INR) Body temperaure (daily maximum) Body temperaure (daily maximum) Glucose Glucose Partial pressure of CO2 (Blood gas test) Partial pressure of CO2 (Blood gas test) Mean corpuscular volume Mean corpuscular volume Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum)->Heart rate (daily maximum) Segmented neutrophils % Segmented neutrophils % Platelets Platelets Albumin Albumin Systolic blood pressure (first measure) Systolic blood pressure (first measure) Estimated glomerular filtration rate (eGFR) ckd-epi Estimated glomerular filtration rate (eGFR) ckd-epi Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Lymphocyte % Lymphocyte % Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum)->Body temperaure (daily maximum) Oxygen saturation (daily minimum)->Partial pressure of CO2 (Blood gas test) Oxygen saturation (daily minimum)->Diastolic blood pressure (daily maximum) Oxygen saturation (daily minimum)->Albumin Age Age Oxygen saturation (daily minimum)->Age Body temperature (daily minimum) Body temperature (daily minimum) Oxygen saturation (daily minimum)->Body temperature (daily minimum) Total CO2 (blood gas test) Total CO2 (blood gas test) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Monocytes % Monocytes % Age->Blood urea nitrogen (BUN) Age->Red Cell Blood Distribution Width (RDW) Age->Urea Body temperature (daily minimum)->Heart rate (daily maximum) Body temperature (daily minimum)->Blood urea nitrogen (BUN) Body temperature (daily minimum)->Body temperaure (daily maximum) Body temperature (daily minimum)->Partial pressure of CO2 (Blood gas test) Body temperature (daily minimum)->Diastolic blood pressure (daily maximum) Body temperature (daily minimum)->Albumin Body temperature (daily minimum)->Red Cell Blood Distribution Width (RDW) Body temperature (daily minimum)->Age Hematocrit Hematocrit Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) D-Dimer D-Dimer Death Death Death->Heart rate (daily maximum) Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Partial pressure of CO2 (Blood gas test) Death->Diastolic blood pressure (daily maximum) Death->Albumin Death->Oxygen saturation (daily minimum) Death->Red Cell Blood Distribution Width (RDW) Death->Age Death->Body temperature (daily minimum) Death->Urea
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 2
max_indegree 0
max_iters 500
mi_nneighbors 100
mi_thres 0.1
n_jobs -1
num_folds 10
operators ('node_type',)
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.825951000547996
In [15]:
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8281660801661153 auc.
Metric Value
roc 0.821189
brier_loss 0.105746
specificity0.95814
sensitivity0.222222
accuracy 0.876033
Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Heart rate (daily maximum) Heart rate (daily maximum) Heart rate (first measure) Heart rate (first measure) Heart rate (daily maximum)->Heart rate (first measure) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Body temperaure (daily maximum) Body temperaure (daily maximum) Body temperaure (daily maximum)->Heart rate (daily maximum) Heart rate (daily minimum) Heart rate (daily minimum) Body temperaure (daily maximum)->Heart rate (daily minimum) Lymphocyte count Lymphocyte count Body temperaure (daily maximum)->Lymphocyte count Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Heart rate (daily minimum)->Oxygen saturation (daily minimum) Calcium Calcium Calcium->Body temperaure (daily maximum) Calcium->Heart rate (daily minimum) Albumin Albumin Calcium->Albumin Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Lymphocyte count->Partial Thromboplastin Time ratio Albumin->Blood urea nitrogen (BUN) Albumin->Body temperaure (daily maximum) Lymphocyte % Lymphocyte % Oxygen saturation (daily minimum)->Heart rate (daily maximum) Oxygen saturation (daily minimum)->Lymphocyte count Oxygen saturation (daily minimum)->Partial Thromboplastin Time ratio Oxygen saturation (daily minimum)->Heart rate (first measure) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW)->Calcium Monocytes % Monocytes % Age Age Age->Blood urea nitrogen (BUN) Age->Calcium Age->Albumin Age->Oxygen saturation (daily minimum) Age->Red Cell Blood Distribution Width (RDW) D-Dimer D-Dimer Death Death Death->Heart rate (daily maximum) Death->Blood urea nitrogen (BUN) Death->Body temperaure (daily maximum) Death->Heart rate (daily minimum) Death->Calcium Death->Lymphocyte count Death->Partial Thromboplastin Time ratio Death->Albumin Death->Heart rate (first measure) Death->Oxygen saturation (daily minimum) Death->Red Cell Blood Distribution Width (RDW) Death->Age
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
k 2
max_indegree 0
max_iters 500
mi_nneighbors 50
mi_thres 0.1
n_jobs -1
num_folds 10
operators ('node_type',)
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8281660801661153

Unconstrained BNC SP¶

In [10]:
model = spbn.HCClassifierSP(
    random_state=SEED, n_jobs=-1, bn_score='holdout-lik', bw_sel='scott'
)
nombre_modelo = 'Unconstrained-BNC-SP'
search_space = {
    'epsilon': Real(1e-6, 1, prior='log-uniform', base=20),
    'max_indegree': Integer(1, 3),
}

Ola 1¶

In [11]:
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params epsilon, max_indegree
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8778172357077461 auc.
Metric Value
roc 0.850887
brier_loss 0.155774
specificity0.943089
sensitivity0.427273
accuracy 0.783708
Death Death Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Death->Oxygen saturation (daily minimum) Age Age Death->Age Calcium Calcium Death->Calcium Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Death->Oxygen saturation (daily maximum) Prothrombin time (PT) Prothrombin time (PT) Death->Prothrombin time (PT) Lymphocyte % Lymphocyte % Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Albumin Albumin Red Cell Blood Distribution Width (RDW)->Albumin Hemoglobin Hemoglobin Red Cell Blood Distribution Width (RDW)->Hemoglobin Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Diastolic blood pressure (daily minimum) Diastolic blood pressure (daily minimum) Systolic blood pressure (daily maximum)->Diastolic blood pressure (daily minimum) Hematocrit Hematocrit Hemoglobin->Hematocrit Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Age->Systolic blood pressure (daily maximum) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Age->Blood urea nitrogen (BUN) Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Age->Mean corpuscular hemoglobin concentration (MCHC) Ferritin Ferritin Age->Ferritin Heart rate (daily maximum) Heart rate (daily maximum) Age->Heart rate (daily maximum) Lymphocyte count Lymphocyte count Age->Lymphocyte count Urea Urea Blood urea nitrogen (BUN)->Urea Glucose Glucose Blood urea nitrogen (BUN)->Glucose Red blood cells Red blood cells Hematocrit->Red blood cells Body temperaure (daily maximum) Body temperaure (daily maximum) Platelets Platelets Body temperaure (daily maximum)->Platelets Body temperature (daily minimum) Body temperature (daily minimum) Oxygen saturation (daily maximum)->Body temperature (daily minimum) Mean corpuscular hemoglobin concentration (MCHC)->Red Cell Blood Distribution Width (RDW) International normalized ratio (INR) International normalized ratio (INR) Mean corpuscular volume Mean corpuscular volume Ferritin->Lactate dehydrogenase (LDH) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Eosinophil % Eosinophil % Eosinophil %->Body temperaure (daily maximum) Lymphocyte count->Lymphocyte % Lymphocyte count->Partial pressure of oxygen (Blood gas test) Body temperature (daily minimum)->Diastolic blood pressure (daily maximum) Heart rate (daily minimum) Heart rate (daily minimum) Body temperature (daily minimum)->Heart rate (daily minimum) Red blood cells->Mean corpuscular volume Prothrombin time (PT)->International normalized ratio (INR)
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0.5358557867652401
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 1
max_iters 2147483647
n_jobs -1
num_folds 10
operators ('arcs', 'node_type')
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8778172357077461

Ola 2¶

In [12]:
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params epsilon, max_indegree
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8354721548802896 auc.
Metric Value
roc 0.858819
brier_loss 0.103887
specificity0.956081
sensitivity0.368421
accuracy 0.86119
Death Death Total CO2 (blood gas test) Total CO2 (blood gas test) Death->Total CO2 (blood gas test) Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Death->Oxygen saturation (daily minimum) Albumin Albumin Death->Albumin Age Age Death->Age Urea Urea Death->Urea Heart rate (daily maximum) Heart rate (daily maximum) Death->Heart rate (daily maximum) Estimated glomerular filtration rate (eGFR) ckd-epi Estimated glomerular filtration rate (eGFR) ckd-epi Platelets Platelets Estimated glomerular filtration rate (eGFR) ckd-epi->Platelets Estimated glomerular filtration rate (eGFR) ckd-epi->Urea Lymphocyte % Lymphocyte % Lymphocyte %->Platelets Glucose Glucose Lymphocyte %->Glucose Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Total CO2 (blood gas test)->Current bicarbonate (blood gas test) Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Oxygen saturation (daily minimum)->Systolic blood pressure (daily maximum) Body temperaure (daily maximum) Body temperaure (daily maximum) Oxygen saturation (daily minimum)->Body temperaure (daily maximum) Body temperature (daily minimum) Body temperature (daily minimum) Oxygen saturation (daily minimum)->Body temperature (daily minimum) C-reactive protein C-reactive protein Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Hematocrit Hematocrit Red Cell Blood Distribution Width (RDW)->Hematocrit Albumin->Hematocrit Systolic blood pressure (daily maximum)->C-reactive protein Systolic blood pressure (first measure) Systolic blood pressure (first measure) Systolic blood pressure (daily maximum)->Systolic blood pressure (first measure) Diastolic blood pressure (daily maximum) Diastolic blood pressure (daily maximum) Systolic blood pressure (daily maximum)->Diastolic blood pressure (daily maximum) Systolic blood pressure (daily maximum)->Heart rate (daily maximum) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Partial pressure of CO2 (Blood gas test) Partial pressure of CO2 (Blood gas test) Lactate dehydrogenase (LDH)->Partial pressure of CO2 (Blood gas test) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test) Partial pressure of oxygen (Blood gas test)->Partial pressure of CO2 (Blood gas test) Age->Estimated glomerular filtration rate (eGFR) ckd-epi Age->Red Cell Blood Distribution Width (RDW) Mean corpuscular volume Mean corpuscular volume Age->Mean corpuscular volume Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum)->Body temperature (daily minimum) International normalized ratio (INR) International normalized ratio (INR) International normalized ratio (INR)->Age Monocytes % Monocytes % International normalized ratio (INR)->Monocytes % Mean corpuscular volume->Red Cell Blood Distribution Width (RDW) D-Dimer D-Dimer D-Dimer->Albumin Systolic blood pressure (first measure)->Mean corpuscular volume Diastolic blood pressure (daily maximum)->Systolic blood pressure (first measure) Urea->Blood urea nitrogen (BUN) Segmented neutrophils % Segmented neutrophils % Urea->Segmented neutrophils % Partial pressure of CO2 (Blood gas test)->Total CO2 (blood gas test) Partial pressure of CO2 (Blood gas test)->Current bicarbonate (blood gas test) Heart rate (daily maximum)->Body temperaure (daily maximum) Body temperature (daily minimum)->Diastolic blood pressure (daily maximum) Monocytes %->Estimated glomerular filtration rate (eGFR) ckd-epi Monocytes %->Lymphocyte % Monocytes %->C-reactive protein Monocytes %->Lactate dehydrogenase (LDH) Monocytes %->Segmented neutrophils % Segmented neutrophils %->Lymphocyte % Segmented neutrophils %->Partial pressure of oxygen (Blood gas test)
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0.00023082848481274974
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 2
max_iters 2147483647
n_jobs -1
num_folds 10
operators ('arcs', 'node_type')
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8354721548802896

Olas 3,4, 5¶

In [13]:
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params epsilon, max_indegree
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.8064127100682388 auc.
Metric Value
roc 0.766236
brier_loss 0.0927182
specificity0.976744
sensitivity0.0740741
accuracy 0.876033
Death Death Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Death->Oxygen saturation (daily minimum) Current bicarbonate (blood gas test) Current bicarbonate (blood gas test) Death->Current bicarbonate (blood gas test) Age Age Death->Age Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Lymphocyte % Lymphocyte % Red Cell Blood Distribution Width (RDW) Red Cell Blood Distribution Width (RDW) Albumin Albumin Heart rate (daily maximum) Heart rate (daily maximum) Oxygen saturation (daily minimum)->Heart rate (daily maximum) Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Calcium Calcium Lactate dehydrogenase (LDH)->Calcium Age->Red Cell Blood Distribution Width (RDW) Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Age->Activated Partial Thromboplastin Time (aPTT) Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Age->Blood urea nitrogen (BUN) Monocytes % Monocytes % Age->Monocytes % Activated Partial Thromboplastin Time (aPTT)->Partial Thromboplastin Time ratio Blood urea nitrogen (BUN)->Albumin D-Dimer D-Dimer Blood urea nitrogen (BUN)->D-Dimer Body temperaure (daily maximum) Body temperaure (daily maximum) Heart rate (first measure) Heart rate (first measure) Heart rate (daily minimum) Heart rate (daily minimum) Heart rate (first measure)->Heart rate (daily minimum) Heart rate (daily maximum)->Heart rate (first measure) Lymphocyte count Lymphocyte count Lymphocyte count->Lymphocyte % Heart rate (daily minimum)->Body temperaure (daily maximum)
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0.9954296486778969
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 1
max_iters 2147483647
n_jobs -1
num_folds 10
operators ('arcs', 'node_type')
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.8064127100682388

Ola 6¶

In [14]:
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
                                 model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params epsilon, max_indegree
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Tune result: 0.7364977629258331 auc.
Metric Value
roc 0.727273
brier_loss 0.110971
specificity0.948718
sensitivity0.227273
accuracy 0.859551
Death Death Oxygen saturation (daily minimum) Oxygen saturation (daily minimum) Death->Oxygen saturation (daily minimum) Systolic blood pressure (daily maximum) Systolic blood pressure (daily maximum) Death->Systolic blood pressure (daily maximum) Hemoglobin Hemoglobin Death->Hemoglobin Systolic blood pressure (first measure) Systolic blood pressure (first measure) Death->Systolic blood pressure (first measure) Heart rate (daily maximum) Heart rate (daily maximum) Death->Heart rate (daily maximum) Partial Thromboplastin Time ratio Partial Thromboplastin Time ratio Activated Partial Thromboplastin Time (aPTT) Activated Partial Thromboplastin Time (aPTT) Partial Thromboplastin Time ratio->Activated Partial Thromboplastin Time (aPTT) Lymphocyte % Lymphocyte % Derived fibrinogen Derived fibrinogen Oxygen saturation (daily minimum)->Derived fibrinogen Age Age Oxygen saturation (daily minimum)->Age Oxygen saturation (first measure) Oxygen saturation (first measure) Oxygen saturation (daily minimum)->Oxygen saturation (first measure) Oxygen saturation (daily minimum)->Heart rate (daily maximum) Monocytes % Monocytes % Oxygen saturation (daily minimum)->Monocytes % Segmented neutrophils % Segmented neutrophils % Oxygen saturation (daily minimum)->Segmented neutrophils % C-reactive protein C-reactive protein C-reactive protein->Derived fibrinogen Systolic blood pressure (daily minimum) Systolic blood pressure (daily minimum) Systolic blood pressure (daily maximum)->Systolic blood pressure (daily minimum) Heart rate (first measure) Heart rate (first measure) Systolic blood pressure (daily maximum)->Heart rate (first measure) Hemoglobin->Partial Thromboplastin Time ratio D-Dimer D-Dimer Hemoglobin->D-Dimer Lactate dehydrogenase (LDH) Lactate dehydrogenase (LDH) Mean corpuscular hemoglobin concentration (MCHC) Mean corpuscular hemoglobin concentration (MCHC) Lactate dehydrogenase (LDH)->Mean corpuscular hemoglobin concentration (MCHC) Lactate dehydrogenase (LDH)->Heart rate (daily maximum) Age->Derived fibrinogen Mean corpuscular volume Mean corpuscular volume Age->Mean corpuscular volume Urea Urea Age->Urea Age->Monocytes % Hematocrit Hematocrit Age->Hematocrit Blood urea nitrogen (BUN) Blood urea nitrogen (BUN) Creatinine Creatinine Blood urea nitrogen (BUN)->Creatinine Systolic blood pressure (daily minimum)->Mean corpuscular hemoglobin concentration (MCHC) Systolic blood pressure (daily minimum)->Systolic blood pressure (first measure) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum) Oxygen saturation (daily maximum)->Systolic blood pressure (daily maximum) Oxygen saturation (daily maximum)->Lactate dehydrogenase (LDH) Oxygen saturation (daily maximum)->D-Dimer Creatinine->Mean corpuscular volume Creatinine->Hematocrit Mean corpuscular hemoglobin concentration (MCHC)->Hemoglobin Mean corpuscular volume->Partial Thromboplastin Time ratio Mean corpuscular volume->Mean corpuscular hemoglobin concentration (MCHC) Oxygen saturation (first measure)->D-Dimer Oxygen saturation (first measure)->Heart rate (first measure) Heart rate (first measure)->Systolic blood pressure (first measure) Heart rate (first measure)->Hematocrit Systolic blood pressure (first measure)->Age Eosinophil % Eosinophil % Eosinophil %->Lymphocyte % Eosinophil %->Lactate dehydrogenase (LDH) Eosinophil %->Monocytes % Eosinophil %->Segmented neutrophils % Hemolysis index Hemolysis index Hemolysis index->Lactate dehydrogenase (LDH) Urea->Blood urea nitrogen (BUN) Heart rate (daily maximum)->Heart rate (first measure) Monocytes %->Lymphocyte % Monocytes %->Segmented neutrophils % Glucose Glucose Monocytes %->Glucose Segmented neutrophils %->Lymphocyte % Segmented neutrophils %->C-reactive protein Segmented neutrophils %->Urea Segmented neutrophils %->Glucose Hematocrit->Hemoglobin Hematocrit->Glucose Prothrombin time (PT) Prothrombin time (PT) Prothrombin time (PT)->Partial Thromboplastin Time ratio Prothrombin time (PT)->C-reactive protein Prothrombin time (PT)->Systolic blood pressure (daily maximum)
Hyperparameter Value
bn_score holdout-lik
bw_sel scott
epsilon 0.004745700905586502
greedy_arc_remove False
greedy_node_remove False
greedy_prune_cv 5
greedy_prune_impatientTrue
max_indegree 3
max_iters 2147483647
n_jobs -1
num_folds 10
operators ('arcs', 'node_type')
patience 0
random_state 12312548
test_holdout_ratio 0.2
verbose 0
0.7364977629258331

Summary for validation set¶

In [51]:
GR_OLAS = [(1,), (2,), (3,4,5), (6,)]
models = [
    'LogisticRegression', 'DecisionTreeClassifier', 'GradientBoostingClassifier',
    'Naive-Bayes-CLG', 'Naive-Bayes-SP', 'Chow-Liu-TAN-CLG', 'Chow-Liu-TAN-SP',
    '2DB-BNC-CLG', '2DB-BNC-SP', 'Unconstrained-BNC-CLG', 'Unconstrained-BNC-SP'
]
reports = {}
for gr_ola in GR_OLAS:
    r_ola = reports[gr_ola] = {}
    for m in models:
        r_ola[m] = load(generar_id(m, gr_ola))
In [57]:
def mod_brier(metrics):
    metrics['brier_score'] = 1 - metrics['brier_loss']
    metrics.pop('brier_loss')
    return metrics

metrics = {
    gr_ola: {
        m: mod_brier(reports[gr_ola][m]['metrics'])
        for m in models
    }
    for gr_ola in GR_OLAS
}
In [63]:
adapt_metrics = {
    (' '.join(metric.upper().split('_')), 'W ' + (','.join([str(o) for o in gr_ola]))): {
        model: metrics[gr_ola][model][metric]
        for model in models 
    }
    for metric in ['roc', 'brier_score', 'sensitivity', 'specificity', 'accuracy'] for gr_ola in GR_OLAS
}
In [65]:
import matplotlib.cm as cm


tabla = pd.DataFrame(adapt_metrics, )
tabla = tabla.style.set_table_styles(
    [dict(selector='th', props=[('text-align', 'center')])])
tabla.background_gradient(cmap=cm.bwr, )
tabla.format(precision=2)
tabla.set_properties(**{
    'text-align': 'center',
    'border': '0.5px solid;'
})
tabla.set_table_styles([
    {'selector': 'th', 'props': 'border: 1px solid; background-color: whitesmoke;'}
], overwrite=False)
tabla.set_table_styles({
    ('ROC', 'W 6'): [{'selector': 'td', 'props': 'border-right: 1.5px solid white'}],
    ('BRIER SCORE', 'W 6'): [{'selector': 'td', 'props': 'border-right: 1.5px solid white'}],
    ('SPECIFICITY', 'W 6'): [{'selector': 'td', 'props': 'border-right: 1.5px solid white'}],
    ('SENSITIVITY', 'W 6'): [{'selector': 'td', 'props': 'border-right: 1.5px solid white'}]
}, overwrite=False, axis=0)
tabla
Out[65]:
  ROC BRIER SCORE SENSITIVITY SPECIFICITY ACCURACY
  W 1 W 2 W 3,4,5 W 6 W 1 W 2 W 3,4,5 W 6 W 1 W 2 W 3,4,5 W 6 W 1 W 2 W 3,4,5 W 6 W 1 W 2 W 3,4,5 W 6
LogisticRegression 0.87 0.88 0.82 0.83 0.87 0.91 0.83 0.90 0.60 0.35 0.74 0.14 0.90 0.97 0.73 0.96 0.81 0.87 0.73 0.86
DecisionTreeClassifier 0.80 0.80 0.80 0.78 0.83 0.83 0.81 0.81 0.72 0.89 1.00 0.73 0.82 0.57 0.51 0.79 0.79 0.63 0.56 0.79
GradientBoostingClassifier 0.88 0.88 0.89 0.82 0.88 0.91 0.93 0.90 0.61 0.28 0.15 0.09 0.92 0.98 0.98 0.99 0.82 0.86 0.89 0.88
Naive-Bayes-CLG 0.84 0.85 0.82 0.75 0.80 0.85 0.88 0.83 0.55 0.49 0.37 0.23 0.88 0.90 0.93 0.88 0.78 0.83 0.86 0.80
Naive-Bayes-SP 0.86 0.86 0.82 0.80 0.83 0.87 0.90 0.88 0.59 0.49 0.22 0.27 0.89 0.90 0.94 0.95 0.79 0.83 0.86 0.87
Chow-Liu-TAN-CLG 0.84 0.85 0.80 0.73 0.80 0.85 0.87 0.81 0.55 0.51 0.33 0.23 0.88 0.89 0.92 0.86 0.78 0.82 0.86 0.78
Chow-Liu-TAN-SP 0.86 0.84 0.78 0.74 0.83 0.84 0.89 0.85 0.56 0.46 0.22 0.23 0.90 0.89 0.96 0.92 0.80 0.82 0.88 0.83
2DB-BNC-CLG 0.84 0.84 0.81 0.76 0.80 0.86 0.87 0.85 0.52 0.47 0.33 0.18 0.89 0.90 0.93 0.92 0.77 0.83 0.86 0.83
2DB-BNC-SP 0.83 0.85 0.82 0.75 0.80 0.88 0.89 0.87 0.53 0.37 0.22 0.18 0.89 0.95 0.96 0.93 0.78 0.85 0.88 0.84
Unconstrained-BNC-CLG 0.84 0.82 0.75 0.66 0.81 0.84 0.87 0.84 0.58 0.40 0.26 0.09 0.87 0.90 0.94 0.92 0.78 0.82 0.86 0.82
Unconstrained-BNC-SP 0.85 0.86 0.77 0.73 0.84 0.90 0.91 0.89 0.43 0.37 0.07 0.23 0.94 0.96 0.98 0.95 0.78 0.86 0.88 0.86

Evaluation on final test set¶

Ola 1¶

In [106]:
model = reports[(1,)]['GradientBoostingClassifier']['model']

report, data = model_demonstrate(
    [1], model, None, pd.concat([df_train, df_val], axis=0), df_test, scale=False, shap_explainer=shap.TreeExplainer,
    model_repr=None, n_iter=0, nombre_modelo='GradientBoostingClassifier', es_test=True)
Metric Value
roc 0.889103
brier_loss 0.123066
specificity0.911765
sensitivity0.622951
accuracy 0.822335
Hyperparameter Value
ccp_alpha 0.0
criterion friedman_mse
init
learning_rate 0.05
loss log_loss
max_depth 3
max_features log2
max_leaf_nodes
min_impurity_decrease 0.0
min_samples_leaf 1
min_samples_split 2
min_weight_fraction_leaf0.0
n_estimators 150
n_iter_no_change 10
random_state 12312548
subsample 1.0
tol 0.0001
validation_fraction 0.1
verbose 0
warm_start False

Ola 2¶

In [107]:
model = reports[(2,)]['GradientBoostingClassifier']['model']

report, data = model_demonstrate(
    [2], model, None, pd.concat([df_train, df_val], axis=0), df_test, scale=False, shap_explainer=shap.TreeExplainer,
    model_repr=None, n_iter=0, nombre_modelo='GradientBoostingClassifier', es_test=True)
Metric Value
roc 0.881983
brier_loss 0.0889052
specificity0.957317
sensitivity0.483871
accuracy 0.882051
Hyperparameter Value
ccp_alpha 0.0
criterion friedman_mse
init
learning_rate 0.18859591001753956
loss log_loss
max_depth 3
max_features log2
max_leaf_nodes
min_impurity_decrease 0.0
min_samples_leaf 1
min_samples_split 2
min_weight_fraction_leaf0.0
n_estimators 64
n_iter_no_change 6
random_state 12312548
subsample 1.0
tol 0.0001
validation_fraction 0.1
verbose 0
warm_start False

Ola 3, 4 y 5¶

In [108]:
model = reports[(3,4,5,)]['GradientBoostingClassifier']['model']

report, data = model_demonstrate(
    [3,4,5], model, None, pd.concat([df_train, df_val], axis=0), df_test, scale=False, shap_explainer=shap.TreeExplainer,
    model_repr=None, n_iter=0, nombre_modelo='GradientBoostingClassifier', es_test=True)
Metric Value
roc 0.868347
brier_loss 0.0862825
specificity0.966387
sensitivity0.133333
accuracy 0.873134
Hyperparameter Value
ccp_alpha 0.0
criterion friedman_mse
init
learning_rate 0.08410300395207498
loss log_loss
max_depth 3
max_features log2
max_leaf_nodes
min_impurity_decrease 0.0
min_samples_leaf 1
min_samples_split 2
min_weight_fraction_leaf0.0
n_estimators 100
n_iter_no_change 8
random_state 12312548
subsample 1.0
tol 0.0001
validation_fraction 0.1
verbose 0
warm_start False

Ola 6¶

In [113]:
model = reports[(6,)]['LogisticRegression']['model']

report, data = model_demonstrate(
    [6], model, None, pd.concat([df_train, df_val], axis=0), df_test, shap_explainer=shap.LinearExplainer,
    model_repr=lr_repr, n_iter=0, nombre_modelo='LogisticRegression', es_test=True)
The max_iter was reached which means the coef_ did not converge
Metric Value
roc 0.747126
brier_loss 0.105356
specificity0.977011
sensitivity0.0833333
accuracy 0.868687
Feature Weight
Age 1.45282
Heart rate (daily maximum) 0.419777
Systolic blood pressure (daily maximum) -0.359161
Mean corpuscular volume 0.292682
Systolic blood pressure (daily minimum) 0.287555
Hematocrit -0.26281
Systolic blood pressure (first measure) -0.256907
Glucose 0.220738
C-reactive protein 0.197807
Monocytes % -0.173081
Oxygen saturation (daily maximum) -0.163244
Hemolysis index 0.161996
Lactate dehydrogenase (LDH) 0.160064
Partial Thromboplastin Time ratio 0.158866
Oxygen saturation (daily minimum) -0.149929
Mean corpuscular hemoglobin concentration (MCHC)-0.141669
Hemoglobin -0.141086
Creatinine 0.133404
Segmented neutrophils % 0.120926
Oxygen saturation (first measure) 0.0912027
Urea 0.068128
Eosinophil % -0.0461896
Heart rate (first measure) 0.0459791
Derived fibrinogen 0.0406181
Prothrombin time (PT) -0.0300428
D-Dimer -0.021472
Activated Partial Thromboplastin Time (aPTT) 0.0191442
Lymphocyte % -0.0172647
Blood urea nitrogen (BUN) -0.00126195
Hyperparameter Value
C 0.6576313755130081
class_weight
dual False
fit_intercept True
intercept_scaling1
l1_ratio 0.27956404472462143
max_iter 100
multi_class auto
n_jobs -1
penalty elasticnet
random_state
solver saga
tol 0.0001
verbose 0
warm_start False

Save plots for BN based¶

In [34]:
 bn_models = ['Naive-Bayes-CLG', 'Naive-Bayes-SP', 'Chow-Liu-TAN-CLG', 'Chow-Liu-TAN-SP',
    '2DB-BNC-CLG', '2DB-BNC-SP', 'Unconstrained-BNC-CLG', 'Unconstrained-BNC-SP']
for gr_olas in GR_OLAS:
    for bn_m in bn_models:
        bn_svg = reports[gr_olas][bn_m]['model'].as_pydot().create_svg().decode('utf-8')
        with open(f'clasificadoresBayesianos/{bn_m}.svg', 'w') as f:
            f.write(bn_svg)
In [41]:
def markov_blanket(bn_model, class_name=translate["EXITUS"]):
    graph = bn_model.bn_.graph()
    children = graph.children(class_name)
    spouses = set()
    for c in children:
        spouses = spouses.union(graph.parents(c))
    spouses.remove(class_name)
    mb = spouses.union(children)
    return list(mb)

for bn_m in ['Unconstrained-BNC-CLG', 'Unconstrained-BNC-SP']:
    for gr_olas in GR_OLAS:
        bn_model = reports[gr_olas][bn_m]['model']
        print(f'Model {bn_m}, wave/s {gr_olas} Markov blanket:')
        print(' - ' + '\n - '.join(markov_blanket(bn_model)))
        print()
Model Unconstrained-BNC-CLG, wave/s (1,) Markov blanket:
 - Calcium
 - International normalized ratio (INR)
 - Oxygen saturation (daily maximum)
 - Blood urea nitrogen (BUN)
 - Partial pressure of oxygen (Blood gas test)
 - Glucose
 - Lactate dehydrogenase (LDH)
 - Heart rate (daily maximum)
 - Oxygen saturation (daily minimum)
 - Age
 - Red Cell Blood Distribution Width (RDW)

Model Unconstrained-BNC-CLG, wave/s (2,) Markov blanket:
 - Urea
 - Hematocrit
 - International normalized ratio (INR)
 - Blood urea nitrogen (BUN)
 - Partial pressure of CO2 (Blood gas test)
 - Oxygen saturation (daily maximum)
 - Partial pressure of oxygen (Blood gas test)
 - Diastolic blood pressure (daily maximum)
 - Estimated glomerular filtration rate (eGFR) ckd-epi
 - D-Dimer
 - Lactate dehydrogenase (LDH)
 - Heart rate (daily maximum)
 - Age
 - Red Cell Blood Distribution Width (RDW)
 - Current bicarbonate (blood gas test)

Model Unconstrained-BNC-CLG, wave/s (3, 4, 5) Markov blanket:
 - Lymphocyte count
 - Lymphocyte %
 - Albumin
 - Body temperaure (daily maximum)
 - Red Cell Blood Distribution Width (RDW)
 - Heart rate (daily minimum)
 - Current bicarbonate (blood gas test)
 - Blood urea nitrogen (BUN)
 - D-Dimer
 - Lactate dehydrogenase (LDH)
 - Heart rate (daily maximum)
 - Age

Model Unconstrained-BNC-CLG, wave/s (6,) Markov blanket:
 - Blood urea nitrogen (BUN)
 - Oxygen saturation (daily maximum)
 - Glucose
 - Hemolysis index
 - Heart rate (first measure)

Model Unconstrained-BNC-SP, wave/s (1,) Markov blanket:
 - Calcium
 - Oxygen saturation (daily maximum)
 - Prothrombin time (PT)
 - Oxygen saturation (daily minimum)
 - Age

Model Unconstrained-BNC-SP, wave/s (2,) Markov blanket:
 - Urea
 - Partial pressure of CO2 (Blood gas test)
 - Estimated glomerular filtration rate (eGFR) ckd-epi
 - Albumin
 - Systolic blood pressure (daily maximum)
 - Oxygen saturation (daily minimum)
 - Total CO2 (blood gas test)
 - International normalized ratio (INR)
 - D-Dimer
 - Heart rate (daily maximum)
 - Age

Model Unconstrained-BNC-SP, wave/s (3, 4, 5) Markov blanket:
 - Age
 - Oxygen saturation (daily minimum)
 - Current bicarbonate (blood gas test)

Model Unconstrained-BNC-SP, wave/s (6,) Markov blanket:
 - Hemoglobin
 - Hematocrit
 - Oxygen saturation (daily maximum)
 - Systolic blood pressure (first measure)
 - Lactate dehydrogenase (LDH)
 - Systolic blood pressure (daily minimum)
 - Prothrombin time (PT)
 - Heart rate (daily maximum)
 - Systolic blood pressure (daily maximum)
 - Oxygen saturation (daily minimum)
 - Heart rate (first measure)
 - Mean corpuscular hemoglobin concentration (MCHC)

In [ ]: